From 426f37cbcbb15641006fd5d5a3ccac2a7b507daa Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Arango Gutierrez Date: Mon, 12 Jun 2023 16:19:56 +0200 Subject: [PATCH 1/3] Enhance NFD section at README Signed-off-by: Carlos Eduardo Arango Gutierrez --- README.md | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index dc2374c1..1cdc976d 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,8 @@ For more information please visit the official [documentation](https://docs.nvid ## Prerequisites ### Kubernetes Node Feature Discovery (NFD) NVIDIA Network operator relies on Node labeling to get the cluster to the desired state. -[Node Feature Discovery](https://github.com/kubernetes-sigs/node-feature-discovery) `v0.10.1` or newer is expected to be deployed to provide the appropriate labeling: +[Node Feature Discovery](https://github.com/kubernetes-sigs/node-feature-discovery) `v0.13.2` or newer is deployed by default via HELM chart installation. +NFD is used to label nodes with the following labels: - PCI vendor and device information - RDMA capability @@ -58,19 +59,23 @@ NVIDIA Network operator relies on Node labeling to get the cluster to the desire __Example NFD worker configurations:__ ```yaml -sources: - custom: - pci: - deviceClassWhitelist: - - "02" - - "0200" - - "0207" - deviceLabelFields: - - "vendor" + config: + sources: + pci: + deviceClassWhitelist: + - "02" + - "0200" + - "0207" + - "0300" + - "0302" + deviceLabelFields: + - vendor ``` >\* Required for GPUDirect driver container deployment +If NFD is already deployed in the cluster, make sure to pass `--set nfd.enabled=false` to the helm install command to avoid conflicts. + ## Resource Definitions The Operator Acts on the following CRDs: From d1c2b99855aa9990de8b7db9786f10d647084a48 Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Arango Gutierrez Date: Mon, 12 Jun 2023 16:20:16 +0200 Subject: [PATCH 2/3] Sync NFD Chart values with those in the GPU-Operator Signed-off-by: Carlos Eduardo Arango Gutierrez --- Dockerfile | 3 +- deployment/network-operator/Chart.yaml | 2 +- .../charts/node-feature-discovery/.helmignore | 23 + .../charts/node-feature-discovery/Chart.yaml | 14 + .../charts/node-feature-discovery/README.md | 10 + .../crds/nfd-api-crds.yaml | 363 +++++++++++++ .../templates/_helpers.tpl | 107 ++++ .../templates/cert-manager-certs.yaml | 67 +++ .../templates/cert-manager-issuer.yaml | 42 ++ .../templates/clusterrole.yaml | 97 ++++ .../templates/clusterrolebinding.yaml | 52 ++ .../templates/master.yaml | 145 ++++++ .../templates/nfd-master-conf.yaml | 10 + .../templates/nfd-topologyupdater-conf.yaml | 10 + .../templates/nfd-worker-conf.yaml | 10 + .../templates/role.yaml | 18 + .../templates/rolebinding.yaml | 17 + .../templates/service.yaml | 18 + .../templates/serviceaccount.yaml | 58 +++ .../templates/topology-gc.yaml | 64 +++ .../templates/topologyupdater-crds.yaml | 278 ++++++++++ .../templates/topologyupdater.yaml | 142 +++++ .../templates/worker.yaml | 144 ++++++ .../charts/node-feature-discovery/values.yaml | 484 ++++++++++++++++++ .../templates/upgrade-crd.yaml | 3 + deployment/network-operator/values.yaml | 40 +- hack/templates/values/values.template | 40 +- 27 files changed, 2229 insertions(+), 32 deletions(-) create mode 100644 deployment/network-operator/charts/node-feature-discovery/.helmignore create mode 100644 deployment/network-operator/charts/node-feature-discovery/Chart.yaml create mode 100644 deployment/network-operator/charts/node-feature-discovery/README.md create mode 100644 deployment/network-operator/charts/node-feature-discovery/crds/nfd-api-crds.yaml create mode 100644 deployment/network-operator/charts/node-feature-discovery/templates/_helpers.tpl create mode 100644 deployment/network-operator/charts/node-feature-discovery/templates/cert-manager-certs.yaml create mode 100644 deployment/network-operator/charts/node-feature-discovery/templates/cert-manager-issuer.yaml create mode 100644 deployment/network-operator/charts/node-feature-discovery/templates/clusterrole.yaml create mode 100644 deployment/network-operator/charts/node-feature-discovery/templates/clusterrolebinding.yaml create mode 100644 deployment/network-operator/charts/node-feature-discovery/templates/master.yaml create mode 100644 deployment/network-operator/charts/node-feature-discovery/templates/nfd-master-conf.yaml create mode 100644 deployment/network-operator/charts/node-feature-discovery/templates/nfd-topologyupdater-conf.yaml create mode 100644 deployment/network-operator/charts/node-feature-discovery/templates/nfd-worker-conf.yaml create mode 100644 deployment/network-operator/charts/node-feature-discovery/templates/role.yaml create mode 100644 deployment/network-operator/charts/node-feature-discovery/templates/rolebinding.yaml create mode 100644 deployment/network-operator/charts/node-feature-discovery/templates/service.yaml create mode 100644 deployment/network-operator/charts/node-feature-discovery/templates/serviceaccount.yaml create mode 100644 deployment/network-operator/charts/node-feature-discovery/templates/topology-gc.yaml create mode 100644 deployment/network-operator/charts/node-feature-discovery/templates/topologyupdater-crds.yaml create mode 100644 deployment/network-operator/charts/node-feature-discovery/templates/topologyupdater.yaml create mode 100644 deployment/network-operator/charts/node-feature-discovery/templates/worker.yaml create mode 100644 deployment/network-operator/charts/node-feature-discovery/values.yaml diff --git a/Dockerfile b/Dockerfile index ffe96743..06e1a3d0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -40,7 +40,8 @@ COPY deployment/network-operator chart # copy CRDs from helm charts RUN mkdir crds && \ cp -r chart/crds /workspace/crds/network-operator/ && \ - cp -r chart/charts/sriov-network-operator/crds /workspace/crds/sriov-network-operator/ + cp -r chart/charts/sriov-network-operator/crds /workspace/crds/sriov-network-operator/ && \ + cp -r chart/charts/node-feature-discovery/crds /workspace/crds/node-feature-discovery/ # Build RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -o manager main.go diff --git a/deployment/network-operator/Chart.yaml b/deployment/network-operator/Chart.yaml index 34e41cd3..b3052945 100644 --- a/deployment/network-operator/Chart.yaml +++ b/deployment/network-operator/Chart.yaml @@ -15,7 +15,7 @@ dependencies: - condition: nfd.enabled name: node-feature-discovery repository: "http://kubernetes-sigs.github.io/node-feature-discovery/charts" - version: 0.10.1 + version: 0.13.2 - condition: sriovNetworkOperator.enabled name: sriov-network-operator repository: "" diff --git a/deployment/network-operator/charts/node-feature-discovery/.helmignore b/deployment/network-operator/charts/node-feature-discovery/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/deployment/network-operator/charts/node-feature-discovery/Chart.yaml b/deployment/network-operator/charts/node-feature-discovery/Chart.yaml new file mode 100644 index 00000000..40bc42c0 --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/Chart.yaml @@ -0,0 +1,14 @@ +apiVersion: v2 +appVersion: v0.13.2 +description: 'Detects hardware features available on each node in a Kubernetes cluster, + and advertises those features using node labels. ' +home: https://github.com/kubernetes-sigs/node-feature-discovery +keywords: +- feature-discovery +- feature-detection +- node-labels +name: node-feature-discovery +sources: +- https://github.com/kubernetes-sigs/node-feature-discovery +type: application +version: 0.13.2 diff --git a/deployment/network-operator/charts/node-feature-discovery/README.md b/deployment/network-operator/charts/node-feature-discovery/README.md new file mode 100644 index 00000000..628ac6a3 --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/README.md @@ -0,0 +1,10 @@ +# Node Feature Discovery + +Node Feature Discovery (NFD) is a Kubernetes add-on for detecting hardware +features and system configuration. Detected features are advertised as node +labels. NFD provides flexible configuration and extension points for a wide +range of vendor and application specific node labeling needs. + +See +[NFD documentation](https://kubernetes-sigs.github.io/node-feature-discovery/v0.13/deployment/helm.html) +for deployment instructions. diff --git a/deployment/network-operator/charts/node-feature-discovery/crds/nfd-api-crds.yaml b/deployment/network-operator/charts/node-feature-discovery/crds/nfd-api-crds.yaml new file mode 100644 index 00000000..775536f2 --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/crds/nfd-api-crds.yaml @@ -0,0 +1,363 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.9.2 + creationTimestamp: null + name: nodefeatures.nfd.k8s-sigs.io +spec: + group: nfd.k8s-sigs.io + names: + kind: NodeFeature + listKind: NodeFeatureList + plural: nodefeatures + singular: nodefeature + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: NodeFeature resource holds the features discovered for one node + in the cluster. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: NodeFeatureSpec describes a NodeFeature object. + properties: + features: + description: Features is the full "raw" features data that has been + discovered. + properties: + attributes: + additionalProperties: + description: AttributeFeatureSet is a set of features having + string value. + properties: + elements: + additionalProperties: + type: string + type: object + required: + - elements + type: object + description: Attributes contains all the attribute-type features + of the node. + type: object + flags: + additionalProperties: + description: FlagFeatureSet is a set of simple features only + containing names without values. + properties: + elements: + additionalProperties: + description: Nil is a dummy empty struct for protobuf + compatibility + type: object + type: object + required: + - elements + type: object + description: Flags contains all the flag-type features of the + node. + type: object + instances: + additionalProperties: + description: InstanceFeatureSet is a set of features each of + which is an instance having multiple attributes. + properties: + elements: + items: + description: InstanceFeature represents one instance of + a complex features, e.g. a device. + properties: + attributes: + additionalProperties: + type: string + type: object + required: + - attributes + type: object + type: array + required: + - elements + type: object + description: Instances contains all the instance-type features + of the node. + type: object + type: object + labels: + additionalProperties: + type: string + description: Labels is the set of node labels that are requested to + be created. + type: object + type: object + required: + - spec + type: object + served: true + storage: true +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.9.2 + creationTimestamp: null + name: nodefeaturerules.nfd.k8s-sigs.io +spec: + group: nfd.k8s-sigs.io + names: + kind: NodeFeatureRule + listKind: NodeFeatureRuleList + plural: nodefeaturerules + shortNames: + - nfr + singular: nodefeaturerule + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: NodeFeatureRule resource specifies a configuration for feature-based + customization of node objects, such as node labeling. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: NodeFeatureRuleSpec describes a NodeFeatureRule. + properties: + rules: + description: Rules is a list of node customization rules. + items: + description: Rule defines a rule for node customization such as + labeling. + properties: + extendedResources: + additionalProperties: + type: string + description: ExtendedResources to create if the rule matches. + type: object + labels: + additionalProperties: + type: string + description: Labels to create if the rule matches. + type: object + labelsTemplate: + description: LabelsTemplate specifies a template to expand for + dynamically generating multiple labels. Data (after template + expansion) must be keys with an optional value ([=]) + separated by newlines. + type: string + matchAny: + description: MatchAny specifies a list of matchers one of which + must match. + items: + description: MatchAnyElem specifies one sub-matcher of MatchAny. + properties: + matchFeatures: + description: MatchFeatures specifies a set of matcher + terms all of which must match. + items: + description: FeatureMatcherTerm defines requirements + against one feature set. All requirements (specified + as MatchExpressions) are evaluated against each element + in the feature set. + properties: + feature: + type: string + matchExpressions: + additionalProperties: + description: "MatchExpression specifies an expression + to evaluate against a set of input values. It + contains an operator that is applied when matching + the input and an array of values that the operator + evaluates the input against. \n NB: CreateMatchExpression + or MustCreateMatchExpression() should be used + for creating new instances. \n NB: Validate() + must be called if Op or Value fields are modified + or if a new instance is created from scratch + without using the helper functions." + properties: + op: + description: Op is the operator to be applied. + enum: + - In + - NotIn + - InRegexp + - Exists + - DoesNotExist + - Gt + - Lt + - GtLt + - IsTrue + - IsFalse + type: string + value: + description: Value is the list of values that + the operand evaluates the input against. + Value should be empty if the operator is + Exists, DoesNotExist, IsTrue or IsFalse. + Value should contain exactly one element + if the operator is Gt or Lt and exactly + two elements if the operator is GtLt. In + other cases Value should contain at least + one element. + items: + type: string + type: array + required: + - op + type: object + description: MatchExpressionSet contains a set of + MatchExpressions, each of which is evaluated against + a set of input values. + type: object + required: + - feature + - matchExpressions + type: object + type: array + required: + - matchFeatures + type: object + type: array + matchFeatures: + description: MatchFeatures specifies a set of matcher terms + all of which must match. + items: + description: FeatureMatcherTerm defines requirements against + one feature set. All requirements (specified as MatchExpressions) + are evaluated against each element in the feature set. + properties: + feature: + type: string + matchExpressions: + additionalProperties: + description: "MatchExpression specifies an expression + to evaluate against a set of input values. It contains + an operator that is applied when matching the input + and an array of values that the operator evaluates + the input against. \n NB: CreateMatchExpression or + MustCreateMatchExpression() should be used for creating + new instances. \n NB: Validate() must be called if + Op or Value fields are modified or if a new instance + is created from scratch without using the helper functions." + properties: + op: + description: Op is the operator to be applied. + enum: + - In + - NotIn + - InRegexp + - Exists + - DoesNotExist + - Gt + - Lt + - GtLt + - IsTrue + - IsFalse + type: string + value: + description: Value is the list of values that the + operand evaluates the input against. Value should + be empty if the operator is Exists, DoesNotExist, + IsTrue or IsFalse. Value should contain exactly + one element if the operator is Gt or Lt and exactly + two elements if the operator is GtLt. In other + cases Value should contain at least one element. + items: + type: string + type: array + required: + - op + type: object + description: MatchExpressionSet contains a set of MatchExpressions, + each of which is evaluated against a set of input values. + type: object + required: + - feature + - matchExpressions + type: object + type: array + name: + description: Name of the rule. + type: string + taints: + description: Taints to create if the rule matches. + items: + description: The node this Taint is attached to has the "effect" + on any pod that does not tolerate the Taint. + properties: + effect: + description: Required. The effect of the taint on pods + that do not tolerate the taint. Valid effects are NoSchedule, + PreferNoSchedule and NoExecute. + type: string + key: + description: Required. The taint key to be applied to + a node. + type: string + timeAdded: + description: TimeAdded represents the time at which the + taint was added. It is only written for NoExecute taints. + format: date-time + type: string + value: + description: The taint value corresponding to the taint + key. + type: string + required: + - effect + - key + type: object + type: array + vars: + additionalProperties: + type: string + description: Vars is the variables to store if the rule matches. + Variables do not directly inflict any changes in the node + object. However, they can be referenced from other rules enabling + more complex rule hierarchies, without exposing intermediary + output values as labels. + type: object + varsTemplate: + description: VarsTemplate specifies a template to expand for + dynamically generating multiple variables. Data (after template + expansion) must be keys with an optional value ([=]) + separated by newlines. + type: string + required: + - name + type: object + type: array + required: + - rules + type: object + required: + - spec + type: object + served: true + storage: true diff --git a/deployment/network-operator/charts/node-feature-discovery/templates/_helpers.tpl b/deployment/network-operator/charts/node-feature-discovery/templates/_helpers.tpl new file mode 100644 index 00000000..5a0a5c97 --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/templates/_helpers.tpl @@ -0,0 +1,107 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "node-feature-discovery.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "node-feature-discovery.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Allow the release namespace to be overridden for multi-namespace deployments in combined charts +*/}} +{{- define "node-feature-discovery.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "node-feature-discovery.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Common labels +*/}} +{{- define "node-feature-discovery.labels" -}} +helm.sh/chart: {{ include "node-feature-discovery.chart" . }} +{{ include "node-feature-discovery.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + +{{/* +Selector labels +*/}} +{{- define "node-feature-discovery.selectorLabels" -}} +app.kubernetes.io/name: {{ include "node-feature-discovery.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} + +{{/* +Create the name of the service account which the nfd master will use +*/}} +{{- define "node-feature-discovery.master.serviceAccountName" -}} +{{- if .Values.master.serviceAccount.create -}} + {{ default (include "node-feature-discovery.fullname" .) .Values.master.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.master.serviceAccount.name }} +{{- end -}} +{{- end -}} + +{{/* +Create the name of the service account which the nfd worker will use +*/}} +{{- define "node-feature-discovery.worker.serviceAccountName" -}} +{{- if .Values.worker.serviceAccount.create -}} + {{ default (printf "%s-worker" (include "node-feature-discovery.fullname" .)) .Values.worker.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.worker.serviceAccount.name }} +{{- end -}} +{{- end -}} + +{{/* +Create the name of the service account which topologyUpdater will use +*/}} +{{- define "node-feature-discovery.topologyUpdater.serviceAccountName" -}} +{{- if .Values.topologyUpdater.serviceAccount.create -}} + {{ default (printf "%s-topology-updater" (include "node-feature-discovery.fullname" .)) .Values.topologyUpdater.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.topologyUpdater.serviceAccount.name }} +{{- end -}} +{{- end -}} + +{{/* +Create the name of the service account which topologyGC will use +*/}} +{{- define "node-feature-discovery.topologyGC.serviceAccountName" -}} +{{- if .Values.topologyGC.serviceAccount.create -}} + {{ default (printf "%s-topology-gc" (include "node-feature-discovery.fullname" .)) .Values.topologyGC.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.topologyGC.serviceAccount.name }} +{{- end -}} +{{- end -}} diff --git a/deployment/network-operator/charts/node-feature-discovery/templates/cert-manager-certs.yaml b/deployment/network-operator/charts/node-feature-discovery/templates/cert-manager-certs.yaml new file mode 100644 index 00000000..ac2e51fc --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/templates/cert-manager-certs.yaml @@ -0,0 +1,67 @@ +{{- if .Values.tls.certManager }} +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: nfd-master-cert + namespace: {{ include "node-feature-discovery.namespace" . }} +spec: + secretName: nfd-master-cert + subject: + organizations: + - node-feature-discovery + commonName: nfd-master + dnsNames: + # must match the service name + - {{ include "node-feature-discovery.fullname" . }}-master + # first one is configured for use by the worker; below are for completeness + - {{ include "node-feature-discovery.fullname" . }}-master.{{ include "node-feature-discovery.namespace" . }}.svc + - {{ include "node-feature-discovery.fullname" . }}-master.{{ include "node-feature-discovery.namespace" . }}.svc.cluster.local + # localhost needed for grpc_health_probe + - localhost + issuerRef: + name: nfd-ca-issuer + kind: Issuer + group: cert-manager.io + +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: nfd-worker-cert + namespace: {{ include "node-feature-discovery.namespace" . }} +spec: + secretName: nfd-worker-cert + subject: + organizations: + - node-feature-discovery + commonName: nfd-worker + dnsNames: + - {{ include "node-feature-discovery.fullname" . }}-worker.{{ include "node-feature-discovery.namespace" . }}.svc.cluster.local + issuerRef: + name: nfd-ca-issuer + kind: Issuer + group: cert-manager.io + +{{- if .Values.topologyUpdater.enable }} +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: nfd-topology-updater-cert + namespace: {{ include "node-feature-discovery.namespace" . }} +spec: + secretName: nfd-topology-updater-cert + subject: + organizations: + - node-feature-discovery + commonName: nfd-topology-updater + dnsNames: + - {{ include "node-feature-discovery.fullname" . }}-topology-updater.{{ include "node-feature-discovery.namespace" . }}.svc.cluster.local + issuerRef: + name: nfd-ca-issuer + kind: Issuer + group: cert-manager.io +{{- end }} + +{{- end }} diff --git a/deployment/network-operator/charts/node-feature-discovery/templates/cert-manager-issuer.yaml b/deployment/network-operator/charts/node-feature-discovery/templates/cert-manager-issuer.yaml new file mode 100644 index 00000000..f3c57ace --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/templates/cert-manager-issuer.yaml @@ -0,0 +1,42 @@ +{{- if .Values.tls.certManager }} +# See https://cert-manager.io/docs/configuration/selfsigned/#bootstrapping-ca-issuers +# - Create a self signed issuer +# - Use this to create a CA cert +# - Use this to now create a CA issuer +--- +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: nfd-ca-bootstrap + namespace: {{ include "node-feature-discovery.namespace" . }} +spec: + selfSigned: {} + +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: nfd-ca-cert + namespace: {{ include "node-feature-discovery.namespace" . }} +spec: + isCA: true + secretName: nfd-ca-cert + subject: + organizations: + - node-feature-discovery + commonName: nfd-ca-cert + issuerRef: + name: nfd-ca-bootstrap + kind: Issuer + group: cert-manager.io + +--- +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: nfd-ca-issuer + namespace: {{ include "node-feature-discovery.namespace" . }} +spec: + ca: + secretName: nfd-ca-cert +{{- end }} diff --git a/deployment/network-operator/charts/node-feature-discovery/templates/clusterrole.yaml b/deployment/network-operator/charts/node-feature-discovery/templates/clusterrole.yaml new file mode 100644 index 00000000..84b32644 --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/templates/clusterrole.yaml @@ -0,0 +1,97 @@ +{{- if .Values.master.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "node-feature-discovery.fullname" . }} + labels: + {{- include "node-feature-discovery.labels" . | nindent 4 }} +rules: +- apiGroups: + - "" + resources: + - nodes + - nodes/status + verbs: + - get + - patch + - update + - list +- apiGroups: + - nfd.k8s-sigs.io + resources: + - nodefeatures + - nodefeaturerules + verbs: + - get + - list + - watch +{{- end }} + +--- +{{- if and .Values.topologyUpdater.enable .Values.topologyUpdater.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "node-feature-discovery.fullname" . }}-topology-updater + labels: + {{- include "node-feature-discovery.labels" . | nindent 4 }} +rules: +- apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list +- apiGroups: + - "" + resources: + - nodes/proxy + verbs: + - get +- apiGroups: + - "" + resources: + - pods + verbs: + - get +- apiGroups: + - topology.node.k8s.io + resources: + - noderesourcetopologies + verbs: + - create + - get + - update +{{- end }} + +--- +{{- if and .Values.topologyGC.enable .Values.topologyGC.rbac.create .Values.topologyUpdater.enable }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "node-feature-discovery.fullname" . }}-topology-gc + labels: + {{- include "node-feature-discovery.labels" . | nindent 4 }} +rules: +- apiGroups: + - "" + resources: + - nodes + verbs: + - list + - watch +- apiGroups: + - "" + resources: + - nodes/proxy + verbs: + - get +- apiGroups: + - topology.node.k8s.io + resources: + - noderesourcetopologies + verbs: + - delete + - list +{{- end }} diff --git a/deployment/network-operator/charts/node-feature-discovery/templates/clusterrolebinding.yaml b/deployment/network-operator/charts/node-feature-discovery/templates/clusterrolebinding.yaml new file mode 100644 index 00000000..b0a69012 --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/templates/clusterrolebinding.yaml @@ -0,0 +1,52 @@ +{{- if .Values.master.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "node-feature-discovery.fullname" . }} + labels: + {{- include "node-feature-discovery.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "node-feature-discovery.fullname" . }} +subjects: +- kind: ServiceAccount + name: {{ include "node-feature-discovery.master.serviceAccountName" . }} + namespace: {{ include "node-feature-discovery.namespace" . }} +{{- end }} + +--- +{{- if and .Values.topologyUpdater.enable .Values.topologyUpdater.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "node-feature-discovery.fullname" . }}-topology-updater + labels: + {{- include "node-feature-discovery.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "node-feature-discovery.fullname" . }}-topology-updater +subjects: +- kind: ServiceAccount + name: {{ include "node-feature-discovery.topologyUpdater.serviceAccountName" . }} + namespace: {{ include "node-feature-discovery.namespace" . }} +{{- end }} + +--- +{{- if and .Values.topologyGC.enable .Values.topologyGC.rbac.create .Values.topologyUpdater.enable }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "node-feature-discovery.fullname" . }}-topology-gc + labels: + {{- include "node-feature-discovery.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "node-feature-discovery.fullname" . }}-topology-gc +subjects: +- kind: ServiceAccount + name: {{ .Values.topologyGC.serviceAccount.name | default "nfd-topology-gc" }} + namespace: {{ include "node-feature-discovery.namespace" . }} +{{- end }} diff --git a/deployment/network-operator/charts/node-feature-discovery/templates/master.yaml b/deployment/network-operator/charts/node-feature-discovery/templates/master.yaml new file mode 100644 index 00000000..418ac089 --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/templates/master.yaml @@ -0,0 +1,145 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "node-feature-discovery.fullname" . }}-master + namespace: {{ include "node-feature-discovery.namespace" . }} + labels: + {{- include "node-feature-discovery.labels" . | nindent 4 }} + role: master + annotations: + {{- toYaml .Values.master.deploymentAnnotations | nindent 4 }} +spec: + replicas: {{ .Values.master.replicaCount }} + selector: + matchLabels: + {{- include "node-feature-discovery.selectorLabels" . | nindent 6 }} + role: master + template: + metadata: + labels: + {{- include "node-feature-discovery.selectorLabels" . | nindent 8 }} + role: master + annotations: + {{- toYaml .Values.master.annotations | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "node-feature-discovery.master.serviceAccountName" . }} + enableServiceLinks: false + securityContext: + {{- toYaml .Values.master.podSecurityContext | nindent 8 }} + containers: + - name: master + securityContext: + {{- toYaml .Values.master.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + livenessProbe: + exec: + command: + - "/usr/bin/grpc_health_probe" + - "-addr=:{{ .Values.master.port | default "8080" }}" + {{- if .Values.tls.enable }} + - "-tls" + - "-tls-ca-cert=/etc/kubernetes/node-feature-discovery/certs/ca.crt" + - "-tls-client-key=/etc/kubernetes/node-feature-discovery/certs/tls.key" + - "-tls-client-cert=/etc/kubernetes/node-feature-discovery/certs/tls.crt" + {{- end }} + initialDelaySeconds: 10 + periodSeconds: 10 + readinessProbe: + exec: + command: + - "/usr/bin/grpc_health_probe" + - "-addr=:{{ .Values.master.port | default "8080" }}" + {{- if .Values.tls.enable }} + - "-tls" + - "-tls-ca-cert=/etc/kubernetes/node-feature-discovery/certs/ca.crt" + - "-tls-client-key=/etc/kubernetes/node-feature-discovery/certs/tls.key" + - "-tls-client-cert=/etc/kubernetes/node-feature-discovery/certs/tls.crt" + {{- end }} + initialDelaySeconds: 5 + periodSeconds: 10 + failureThreshold: 10 + ports: + - containerPort: {{ .Values.master.port | default "8080" }} + name: grpc + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + command: + - "nfd-master" + resources: + {{- toYaml .Values.master.resources | nindent 12 }} + args: + {{- if .Values.master.instance | empty | not }} + - "-instance={{ .Values.master.instance }}" + {{- end }} + - "-port={{ .Values.master.port | default "8080" }}" + {{- if .Values.enableNodeFeatureApi }} + - "-enable-nodefeature-api" + {{- end }} + {{- if .Values.master.extraLabelNs | empty | not }} + - "-extra-label-ns={{- join "," .Values.master.extraLabelNs }}" + {{- end }} + {{- if .Values.master.denyLabelNs | empty | not }} + - "-deny-label-ns={{- join "," .Values.master.denyLabelNs }}" + {{- end }} + {{- if .Values.master.resourceLabels | empty | not }} + - "-resource-labels={{- join "," .Values.master.resourceLabels }}" + {{- end }} + {{- if .Values.master.enableTaints }} + - "-enable-taints" + {{- end }} + {{- if .Values.master.crdController | kindIs "invalid" | not }} + - "-crd-controller={{ .Values.master.crdController }}" + {{- else }} + ## By default, disable crd controller for other than the default instances + - "-featurerules-controller={{ .Values.master.instance | empty }}" + {{- end }} + {{- if .Values.master.featureRulesController | kindIs "invalid" | not }} + - "-featurerules-controller={{ .Values.master.featureRulesController }}" + {{- end }} + {{- if .Values.tls.enable }} + - "-ca-file=/etc/kubernetes/node-feature-discovery/certs/ca.crt" + - "-key-file=/etc/kubernetes/node-feature-discovery/certs/tls.key" + - "-cert-file=/etc/kubernetes/node-feature-discovery/certs/tls.crt" + {{- end }} + volumeMounts: + {{- if .Values.tls.enable }} + - name: nfd-master-cert + mountPath: "/etc/kubernetes/node-feature-discovery/certs" + readOnly: true + {{- end }} + - name: nfd-master-conf + mountPath: "/etc/kubernetes/node-feature-discovery" + readOnly: true + volumes: + {{- if .Values.tls.enable }} + - name: nfd-master-cert + secret: + secretName: nfd-master-cert + {{- end }} + - name: nfd-master-conf + configMap: + name: {{ include "node-feature-discovery.fullname" . }}-master-conf + items: + - key: nfd-master.conf + path: nfd-master.conf + + {{- with .Values.master.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.master.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.master.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/deployment/network-operator/charts/node-feature-discovery/templates/nfd-master-conf.yaml b/deployment/network-operator/charts/node-feature-discovery/templates/nfd-master-conf.yaml new file mode 100644 index 00000000..c806a8e5 --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/templates/nfd-master-conf.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "node-feature-discovery.fullname" . }}-master-conf + namespace: {{ include "node-feature-discovery.namespace" . }} + labels: + {{- include "node-feature-discovery.labels" . | nindent 4 }} +data: + nfd-master.conf: |- + {{- .Values.master.config | toYaml | nindent 4 }} diff --git a/deployment/network-operator/charts/node-feature-discovery/templates/nfd-topologyupdater-conf.yaml b/deployment/network-operator/charts/node-feature-discovery/templates/nfd-topologyupdater-conf.yaml new file mode 100644 index 00000000..9867f508 --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/templates/nfd-topologyupdater-conf.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "node-feature-discovery.fullname" . }}-topology-updater-conf + namespace: {{ include "node-feature-discovery.namespace" . }} + labels: + {{- include "node-feature-discovery.labels" . | nindent 4 }} +data: + nfd-topology-updater.conf: |- + {{- .Values.topologyUpdater.config | toYaml | nindent 4 }} diff --git a/deployment/network-operator/charts/node-feature-discovery/templates/nfd-worker-conf.yaml b/deployment/network-operator/charts/node-feature-discovery/templates/nfd-worker-conf.yaml new file mode 100644 index 00000000..61d2a481 --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/templates/nfd-worker-conf.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "node-feature-discovery.fullname" . }}-worker-conf + namespace: {{ include "node-feature-discovery.namespace" . }} + labels: + {{- include "node-feature-discovery.labels" . | nindent 4 }} +data: + nfd-worker.conf: |- + {{- .Values.worker.config | toYaml | nindent 4 }} diff --git a/deployment/network-operator/charts/node-feature-discovery/templates/role.yaml b/deployment/network-operator/charts/node-feature-discovery/templates/role.yaml new file mode 100644 index 00000000..f63cb8ff --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/templates/role.yaml @@ -0,0 +1,18 @@ +{{- if .Values.worker.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "node-feature-discovery.fullname" . }}-worker + labels: + {{- include "node-feature-discovery.labels" . | nindent 4 }} +rules: +- apiGroups: + - nfd.k8s-sigs.io + resources: + - nodefeatures + verbs: + - create + - get + - update +{{- end }} + diff --git a/deployment/network-operator/charts/node-feature-discovery/templates/rolebinding.yaml b/deployment/network-operator/charts/node-feature-discovery/templates/rolebinding.yaml new file mode 100644 index 00000000..30a00381 --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/templates/rolebinding.yaml @@ -0,0 +1,17 @@ +{{- if .Values.worker.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "node-feature-discovery.fullname" . }}-worker + labels: + {{- include "node-feature-discovery.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "node-feature-discovery.fullname" . }}-worker +subjects: +- kind: ServiceAccount + name: {{ include "node-feature-discovery.worker.serviceAccountName" . }} + namespace: {{ include "node-feature-discovery.namespace" . }} +{{- end }} + diff --git a/deployment/network-operator/charts/node-feature-discovery/templates/service.yaml b/deployment/network-operator/charts/node-feature-discovery/templates/service.yaml new file mode 100644 index 00000000..0d478981 --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/templates/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "node-feature-discovery.fullname" . }}-master + namespace: {{ include "node-feature-discovery.namespace" . }} + labels: + {{- include "node-feature-discovery.labels" . | nindent 4 }} + role: master +spec: + type: {{ .Values.master.service.type }} + ports: + - port: {{ .Values.master.service.port | default "8080" }} + targetPort: grpc + protocol: TCP + name: grpc + selector: + {{- include "node-feature-discovery.selectorLabels" . | nindent 4 }} + role: master diff --git a/deployment/network-operator/charts/node-feature-discovery/templates/serviceaccount.yaml b/deployment/network-operator/charts/node-feature-discovery/templates/serviceaccount.yaml new file mode 100644 index 00000000..03211e7c --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/templates/serviceaccount.yaml @@ -0,0 +1,58 @@ +{{- if .Values.master.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "node-feature-discovery.master.serviceAccountName" . }} + namespace: {{ include "node-feature-discovery.namespace" . }} + labels: + {{- include "node-feature-discovery.labels" . | nindent 4 }} + {{- with .Values.master.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} + +--- +{{- if and .Values.topologyUpdater.enable .Values.topologyUpdater.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "node-feature-discovery.topologyUpdater.serviceAccountName" . }} + namespace: {{ include "node-feature-discovery.namespace" . }} + labels: + {{- include "node-feature-discovery.labels" . | nindent 4 }} + {{- with .Values.topologyUpdater.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} + +--- +{{- if and .Values.topologyGC.enable .Values.topologyGC.serviceAccount.create .Values.topologyUpdater.enable }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ .Values.topologyGC.serviceAccount.name | default "nfd-topology-gc" }} + namespace: {{ include "node-feature-discovery.namespace" . }} + labels: + {{- include "node-feature-discovery.labels" . | nindent 4 }} + {{- with .Values.topologyUpdater.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} + +--- +{{- if .Values.worker.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "node-feature-discovery.worker.serviceAccountName" . }} + namespace: {{ include "node-feature-discovery.namespace" . }} + labels: + {{- include "node-feature-discovery.labels" . | nindent 4 }} + {{- with .Values.worker.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/deployment/network-operator/charts/node-feature-discovery/templates/topology-gc.yaml b/deployment/network-operator/charts/node-feature-discovery/templates/topology-gc.yaml new file mode 100644 index 00000000..642fec45 --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/templates/topology-gc.yaml @@ -0,0 +1,64 @@ +{{- if and .Values.topologyGC.enable .Values.topologyUpdater.enable -}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "node-feature-discovery.fullname" . }}-topology-gc + namespace: {{ include "node-feature-discovery.namespace" . }} + labels: + {{- include "node-feature-discovery.labels" . | nindent 4 }} + role: topology-gc +spec: + replicas: {{ .Values.topologyGC.replicaCount | default 1 }} + selector: + matchLabels: + {{- include "node-feature-discovery.selectorLabels" . | nindent 6 }} + role: topology-gc + template: + metadata: + labels: + {{- include "node-feature-discovery.selectorLabels" . | nindent 8 }} + role: topology-gc + annotations: + {{- toYaml .Values.topologyGC.annotations | nindent 8 }} + spec: + serviceAccountName: {{ .Values.topologyGC.serviceAccountName | default "nfd-topology-gc" }} + dnsPolicy: ClusterFirstWithHostNet + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.topologyGC.podSecurityContext | nindent 8 }} + containers: + - name: topology-gc + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: "{{ .Values.image.pullPolicy }}" + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + command: + - "nfd-topology-gc" + args: + {{- if .Values.topologyGC.interval | empty | not }} + - "-gc-interval={{ .Values.topologyGC.interval }}" + {{- end }} + resources: + {{- toYaml .Values.topologyGC.resources | nindent 12 }} + securityContext: + {{- toYaml .Values.topologyGC.securityContext | nindent 12 }} + + {{- with .Values.topologyGC.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.topologyGC.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.topologyGC.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/deployment/network-operator/charts/node-feature-discovery/templates/topologyupdater-crds.yaml b/deployment/network-operator/charts/node-feature-discovery/templates/topologyupdater-crds.yaml new file mode 100644 index 00000000..b6b91968 --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/templates/topologyupdater-crds.yaml @@ -0,0 +1,278 @@ +{{- if and .Values.topologyUpdater.enable .Values.topologyUpdater.createCRDs -}} +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + api-approved.kubernetes.io: https://github.com/kubernetes/enhancements/pull/1870 + controller-gen.kubebuilder.io/version: v0.11.2 + creationTimestamp: null + name: noderesourcetopologies.topology.node.k8s.io +spec: + group: topology.node.k8s.io + names: + kind: NodeResourceTopology + listKind: NodeResourceTopologyList + plural: noderesourcetopologies + shortNames: + - node-res-topo + singular: noderesourcetopology + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: NodeResourceTopology describes node resources and their topology. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + topologyPolicies: + items: + type: string + type: array + zones: + description: ZoneList contains an array of Zone objects. + items: + description: Zone represents a resource topology zone, e.g. socket, + node, die or core. + properties: + attributes: + description: AttributeList contains an array of AttributeInfo objects. + items: + description: AttributeInfo contains one attribute of a Zone. + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + costs: + description: CostList contains an array of CostInfo objects. + items: + description: CostInfo describes the cost (or distance) between + two Zones. + properties: + name: + type: string + value: + format: int64 + type: integer + required: + - name + - value + type: object + type: array + name: + type: string + parent: + type: string + resources: + description: ResourceInfoList contains an array of ResourceInfo + objects. + items: + description: ResourceInfo contains information about one resource + type. + properties: + allocatable: + anyOf: + - type: integer + - type: string + description: Allocatable quantity of the resource, corresponding + to allocatable in node status, i.e. total amount of this + resource available to be used by pods. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + available: + anyOf: + - type: integer + - type: string + description: Available is the amount of this resource currently + available for new (to be scheduled) pods, i.e. Allocatable + minus the resources reserved by currently running pods. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + capacity: + anyOf: + - type: integer + - type: string + description: Capacity of the resource, corresponding to capacity + in node status, i.e. total amount of this resource that + the node has. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + name: + description: Name of the resource. + type: string + required: + - allocatable + - available + - capacity + - name + type: object + type: array + type: + type: string + required: + - name + - type + type: object + type: array + required: + - topologyPolicies + - zones + type: object + served: true + storage: false + - name: v1alpha2 + schema: + openAPIV3Schema: + description: NodeResourceTopology describes node resources and their topology. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + attributes: + description: AttributeList contains an array of AttributeInfo objects. + items: + description: AttributeInfo contains one attribute of a Zone. + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + topologyPolicies: + description: 'DEPRECATED (to be removed in v1beta1): use top level attributes + if needed' + items: + type: string + type: array + zones: + description: ZoneList contains an array of Zone objects. + items: + description: Zone represents a resource topology zone, e.g. socket, + node, die or core. + properties: + attributes: + description: AttributeList contains an array of AttributeInfo objects. + items: + description: AttributeInfo contains one attribute of a Zone. + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + costs: + description: CostList contains an array of CostInfo objects. + items: + description: CostInfo describes the cost (or distance) between + two Zones. + properties: + name: + type: string + value: + format: int64 + type: integer + required: + - name + - value + type: object + type: array + name: + type: string + parent: + type: string + resources: + description: ResourceInfoList contains an array of ResourceInfo + objects. + items: + description: ResourceInfo contains information about one resource + type. + properties: + allocatable: + anyOf: + - type: integer + - type: string + description: Allocatable quantity of the resource, corresponding + to allocatable in node status, i.e. total amount of this + resource available to be used by pods. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + available: + anyOf: + - type: integer + - type: string + description: Available is the amount of this resource currently + available for new (to be scheduled) pods, i.e. Allocatable + minus the resources reserved by currently running pods. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + capacity: + anyOf: + - type: integer + - type: string + description: Capacity of the resource, corresponding to capacity + in node status, i.e. total amount of this resource that + the node has. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + name: + description: Name of the resource. + type: string + required: + - allocatable + - available + - capacity + - name + type: object + type: array + type: + type: string + required: + - name + - type + type: object + type: array + required: + - zones + type: object + served: true + storage: true +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] +{{- end }} diff --git a/deployment/network-operator/charts/node-feature-discovery/templates/topologyupdater.yaml b/deployment/network-operator/charts/node-feature-discovery/templates/topologyupdater.yaml new file mode 100644 index 00000000..cd3fca05 --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/templates/topologyupdater.yaml @@ -0,0 +1,142 @@ +{{- if .Values.topologyUpdater.enable -}} +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "node-feature-discovery.fullname" . }}-topology-updater + namespace: {{ include "node-feature-discovery.namespace" . }} + labels: + {{- include "node-feature-discovery.labels" . | nindent 4 }} + role: topology-updater +spec: + selector: + matchLabels: + {{- include "node-feature-discovery.selectorLabels" . | nindent 6 }} + role: topology-updater + template: + metadata: + labels: + {{- include "node-feature-discovery.selectorLabels" . | nindent 8 }} + role: topology-updater + annotations: + {{- toYaml .Values.topologyUpdater.annotations | nindent 8 }} + spec: + serviceAccountName: {{ include "node-feature-discovery.topologyUpdater.serviceAccountName" . }} + dnsPolicy: ClusterFirstWithHostNet + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.topologyUpdater.podSecurityContext | nindent 8 }} + containers: + - name: topology-updater + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: "{{ .Values.image.pullPolicy }}" + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + command: + - "nfd-topology-updater" + args: + - "-podresources-socket=/host-var/lib/kubelet-podresources/kubelet.sock" + {{- if .Values.topologyUpdater.updateInterval | empty | not }} + - "-sleep-interval={{ .Values.topologyUpdater.updateInterval }}" + {{- else }} + - "-sleep-interval=3s" + {{- end }} + {{- if .Values.topologyUpdater.watchNamespace | empty | not }} + - "-watch-namespace={{ .Values.topologyUpdater.watchNamespace }}" + {{- else }} + - "-watch-namespace=*" + {{- end }} + {{- if .Values.tls.enable }} + - "-ca-file=/etc/kubernetes/node-feature-discovery/certs/ca.crt" + - "-key-file=/etc/kubernetes/node-feature-discovery/certs/tls.key" + - "-cert-file=/etc/kubernetes/node-feature-discovery/certs/tls.crt" + {{- end }} + {{- if .Values.topologyUpdater.podSetFingerprint }} + - "-pods-fingerprint" + {{- end }} + {{- if .Values.topologyUpdater.kubeletConfigPath | empty | not }} + - "-kubelet-config-uri=file:///host-var/kubelet-config" + {{- end }} + {{- if .Values.topologyUpdater.kubeletStateDir | empty }} + # Disable kubelet state tracking by giving an empty path + - "-kubelet-state-dir=" + {{- end }} + volumeMounts: + {{- if .Values.topologyUpdater.kubeletConfigPath | empty | not }} + - name: kubelet-config + mountPath: /host-var/kubelet-config + {{- end }} + - name: kubelet-podresources-sock + mountPath: /host-var/lib/kubelet-podresources/kubelet.sock + - name: host-sys + mountPath: /host-sys + {{- if .Values.topologyUpdater.kubeletStateDir | empty | not }} + - name: kubelet-state-files + mountPath: /host-var/lib/kubelet + readOnly: true + {{- end }} + {{- if .Values.tls.enable }} + - name: nfd-topology-updater-cert + mountPath: "/etc/kubernetes/node-feature-discovery/certs" + readOnly: true + {{- end }} + - name: nfd-topology-updater-conf + mountPath: "/etc/kubernetes/node-feature-discovery" + readOnly: true + + resources: + {{- toYaml .Values.topologyUpdater.resources | nindent 12 }} + securityContext: + {{- toYaml .Values.topologyUpdater.securityContext | nindent 12 }} + volumes: + - name: host-sys + hostPath: + path: "/sys" + {{- if .Values.topologyUpdater.kubeletConfigPath | empty | not }} + - name: kubelet-config + hostPath: + path: {{ .Values.topologyUpdater.kubeletConfigPath }} + {{- end }} + - name: kubelet-podresources-sock + hostPath: + {{- if .Values.topologyUpdater.kubeletPodResourcesSockPath | empty | not }} + path: {{ .Values.topologyUpdater.kubeletPodResourcesSockPath }} + {{- else }} + path: /var/lib/kubelet/pod-resources/kubelet.sock + {{- end }} + {{- if .Values.topologyUpdater.kubeletStateDir | empty | not }} + - name: kubelet-state-files + hostPath: + path: {{ .Values.topologyUpdater.kubeletStateDir }} + {{- end }} + - name: nfd-topology-updater-conf + configMap: + name: {{ include "node-feature-discovery.fullname" . }}-topology-updater-conf + items: + - key: nfd-topology-updater.conf + path: nfd-topology-updater.conf + {{- if .Values.tls.enable }} + - name: nfd-topology-updater-cert + secret: + secretName: nfd-topology-updater-cert + {{- end }} + + + {{- with .Values.topologyUpdater.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.topologyUpdater.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.topologyUpdater.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/deployment/network-operator/charts/node-feature-discovery/templates/worker.yaml b/deployment/network-operator/charts/node-feature-discovery/templates/worker.yaml new file mode 100644 index 00000000..c1240bdc --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/templates/worker.yaml @@ -0,0 +1,144 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "node-feature-discovery.fullname" . }}-worker + namespace: {{ include "node-feature-discovery.namespace" . }} + labels: + {{- include "node-feature-discovery.labels" . | nindent 4 }} + role: worker + annotations: + {{- toYaml .Values.worker.daemonsetAnnotations | nindent 4 }} +spec: + selector: + matchLabels: + {{- include "node-feature-discovery.selectorLabels" . | nindent 6 }} + role: worker + template: + metadata: + labels: + {{- include "node-feature-discovery.selectorLabels" . | nindent 8 }} + role: worker + annotations: + {{- toYaml .Values.worker.annotations | nindent 8 }} + spec: + dnsPolicy: ClusterFirstWithHostNet + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "node-feature-discovery.worker.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.worker.podSecurityContext | nindent 8 }} + containers: + - name: worker + securityContext: + {{- toYaml .Values.worker.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + resources: + {{- toYaml .Values.worker.resources | nindent 12 }} + command: + - "nfd-worker" + args: + - "-server={{ include "node-feature-discovery.fullname" . }}-master:{{ .Values.master.service.port }}" + {{- if .Values.enableNodeFeatureApi }} + - "-enable-nodefeature-api" + {{- end }} +{{- if .Values.tls.enable }} + - "-ca-file=/etc/kubernetes/node-feature-discovery/certs/ca.crt" + - "-key-file=/etc/kubernetes/node-feature-discovery/certs/tls.key" + - "-cert-file=/etc/kubernetes/node-feature-discovery/certs/tls.crt" +{{- end }} + volumeMounts: + - name: host-boot + mountPath: "/host-boot" + readOnly: true + - name: host-os-release + mountPath: "/host-etc/os-release" + readOnly: true + - name: host-sys + mountPath: "/host-sys" + readOnly: true + - name: host-usr-lib + mountPath: "/host-usr/lib" + readOnly: true + - name: host-lib + mountPath: "/host-lib" + readOnly: true + {{- if .Values.worker.mountUsrSrc }} + - name: host-usr-src + mountPath: "/host-usr/src" + readOnly: true + {{- end }} + - name: source-d + mountPath: "/etc/kubernetes/node-feature-discovery/source.d/" + readOnly: true + - name: features-d + mountPath: "/etc/kubernetes/node-feature-discovery/features.d/" + readOnly: true + - name: nfd-worker-conf + mountPath: "/etc/kubernetes/node-feature-discovery" + readOnly: true +{{- if .Values.tls.enable }} + - name: nfd-worker-cert + mountPath: "/etc/kubernetes/node-feature-discovery/certs" + readOnly: true +{{- end }} + volumes: + - name: host-boot + hostPath: + path: "/boot" + - name: host-os-release + hostPath: + path: "/etc/os-release" + - name: host-sys + hostPath: + path: "/sys" + - name: host-usr-lib + hostPath: + path: "/usr/lib" + - name: host-lib + hostPath: + path: "/lib" + {{- if .Values.worker.mountUsrSrc }} + - name: host-usr-src + hostPath: + path: "/usr/src" + {{- end }} + - name: source-d + hostPath: + path: "/etc/kubernetes/node-feature-discovery/source.d/" + - name: features-d + hostPath: + path: "/etc/kubernetes/node-feature-discovery/features.d/" + - name: nfd-worker-conf + configMap: + name: {{ include "node-feature-discovery.fullname" . }}-worker-conf + items: + - key: nfd-worker.conf + path: nfd-worker.conf +{{- if .Values.tls.enable }} + - name: nfd-worker-cert + secret: + secretName: nfd-worker-cert +{{- end }} + {{- with .Values.worker.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.worker.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.worker.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.worker.priorityClassName }} + priorityClassName: {{ . | quote }} + {{- end }} diff --git a/deployment/network-operator/charts/node-feature-discovery/values.yaml b/deployment/network-operator/charts/node-feature-discovery/values.yaml new file mode 100644 index 00000000..d3db4355 --- /dev/null +++ b/deployment/network-operator/charts/node-feature-discovery/values.yaml @@ -0,0 +1,484 @@ +image: + repository: registry.k8s.io/nfd/node-feature-discovery + # This should be set to 'IfNotPresent' for released version + pullPolicy: IfNotPresent + # tag, if defined will use the given image tag, else Chart.AppVersion will be used + # tag +imagePullSecrets: [] + +nameOverride: "" +fullnameOverride: "" +namespaceOverride: "" + +enableNodeFeatureApi: false + +master: + config: ### + # noPublish: false + # extraLabelNs: ["added.ns.io","added.kubernets.io"] + # denyLabelNs: ["denied.ns.io","denied.kubernetes.io"] + # resourceLabels: ["vendor-1.com/feature-1","vendor-2.io/feature-2"] + # enableTaints: false + # labelWhiteList: "foo" + ### + # The TCP port that nfd-master listens for incoming requests. Default: 8080 + port: 8080 + instance: + featureApi: + denyLabelNs: [] + extraLabelNs: [] + resourceLabels: [] + enableTaints: false + crdController: null + featureRulesController: null + deploymentAnnotations: {} + replicaCount: 1 + + podSecurityContext: {} + # fsGroup: 2000 + + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: [ "ALL" ] + readOnlyRootFilesystem: true + runAsNonRoot: true + # runAsUser: 1000 + + serviceAccount: + # Specifies whether a service account should be created + create: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: + + rbac: + create: true + + service: + type: ClusterIP + port: 8080 + + resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + nodeSelector: {} + + tolerations: + - key: "node-role.kubernetes.io/master" + operator: "Equal" + value: "" + effect: "NoSchedule" + - key: "node-role.kubernetes.io/control-plane" + operator: "Equal" + value: "" + effect: "NoSchedule" + + annotations: {} + + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: "node-role.kubernetes.io/master" + operator: In + values: [""] + - weight: 1 + preference: + matchExpressions: + - key: "node-role.kubernetes.io/control-plane" + operator: In + values: [""] + +worker: + config: ### + #core: + # labelWhiteList: + # noPublish: false + # sleepInterval: 60s + # featureSources: [all] + # labelSources: [all] + # klog: + # addDirHeader: false + # alsologtostderr: false + # logBacktraceAt: + # logtostderr: true + # skipHeaders: false + # stderrthreshold: 2 + # v: 0 + # vmodule: + ## NOTE: the following options are not dynamically run-time configurable + ## and require a nfd-worker restart to take effect after being changed + # logDir: + # logFile: + # logFileMaxSize: 1800 + # skipLogHeaders: false + #sources: + # cpu: + # cpuid: + ## NOTE: whitelist has priority over blacklist + # attributeBlacklist: + # - "BMI1" + # - "BMI2" + # - "CLMUL" + # - "CMOV" + # - "CX16" + # - "ERMS" + # - "F16C" + # - "HTT" + # - "LZCNT" + # - "MMX" + # - "MMXEXT" + # - "NX" + # - "POPCNT" + # - "RDRAND" + # - "RDSEED" + # - "RDTSCP" + # - "SGX" + # - "SSE" + # - "SSE2" + # - "SSE3" + # - "SSE4" + # - "SSE42" + # - "SSSE3" + # attributeWhitelist: + # kernel: + # kconfigFile: "/path/to/kconfig" + # configOpts: + # - "NO_HZ" + # - "X86" + # - "DMI" + # pci: + # deviceClassWhitelist: + # - "0200" + # - "03" + # - "12" + # deviceLabelFields: + # - "class" + # - "vendor" + # - "device" + # - "subsystem_vendor" + # - "subsystem_device" + # usb: + # deviceClassWhitelist: + # - "0e" + # - "ef" + # - "fe" + # - "ff" + # deviceLabelFields: + # - "class" + # - "vendor" + # - "device" + # local: + # hooksEnabled: true + # custom: + # # The following feature demonstrates the capabilities of the matchFeatures + # - name: "my custom rule" + # labels: + # my-ng-feature: "true" + # # matchFeatures implements a logical AND over all matcher terms in the + # # list (i.e. all of the terms, or per-feature matchers, must match) + # matchFeatures: + # - feature: cpu.cpuid + # matchExpressions: + # AVX512F: {op: Exists} + # - feature: cpu.cstate + # matchExpressions: + # enabled: {op: IsTrue} + # - feature: cpu.pstate + # matchExpressions: + # no_turbo: {op: IsFalse} + # scaling_governor: {op: In, value: ["performance"]} + # - feature: cpu.rdt + # matchExpressions: + # RDTL3CA: {op: Exists} + # - feature: cpu.sst + # matchExpressions: + # bf.enabled: {op: IsTrue} + # - feature: cpu.topology + # matchExpressions: + # hardware_multithreading: {op: IsFalse} + # + # - feature: kernel.config + # matchExpressions: + # X86: {op: Exists} + # LSM: {op: InRegexp, value: ["apparmor"]} + # - feature: kernel.loadedmodule + # matchExpressions: + # e1000e: {op: Exists} + # - feature: kernel.selinux + # matchExpressions: + # enabled: {op: IsFalse} + # - feature: kernel.version + # matchExpressions: + # major: {op: In, value: ["5"]} + # minor: {op: Gt, value: ["10"]} + # + # - feature: storage.block + # matchExpressions: + # rotational: {op: In, value: ["0"]} + # dax: {op: In, value: ["0"]} + # + # - feature: network.device + # matchExpressions: + # operstate: {op: In, value: ["up"]} + # speed: {op: Gt, value: ["100"]} + # + # - feature: memory.numa + # matchExpressions: + # node_count: {op: Gt, value: ["2"]} + # - feature: memory.nv + # matchExpressions: + # devtype: {op: In, value: ["nd_dax"]} + # mode: {op: In, value: ["memory"]} + # + # - feature: system.osrelease + # matchExpressions: + # ID: {op: In, value: ["fedora", "centos"]} + # - feature: system.name + # matchExpressions: + # nodename: {op: InRegexp, value: ["^worker-X"]} + # + # - feature: local.label + # matchExpressions: + # custom-feature-knob: {op: Gt, value: ["100"]} + # + # # The following feature demonstrates the capabilities of the matchAny + # - name: "my matchAny rule" + # labels: + # my-ng-feature-2: "my-value" + # # matchAny implements a logical IF over all elements (sub-matchers) in + # # the list (i.e. at least one feature matcher must match) + # matchAny: + # - matchFeatures: + # - feature: kernel.loadedmodule + # matchExpressions: + # driver-module-X: {op: Exists} + # - feature: pci.device + # matchExpressions: + # vendor: {op: In, value: ["8086"]} + # class: {op: In, value: ["0200"]} + # - matchFeatures: + # - feature: kernel.loadedmodule + # matchExpressions: + # driver-module-Y: {op: Exists} + # - feature: usb.device + # matchExpressions: + # vendor: {op: In, value: ["8086"]} + # class: {op: In, value: ["02"]} + # + # # The following features demonstreate label templating capabilities + # - name: "my template rule" + # labelsTemplate: | + # {{ range .system.osrelease }}my-system-feature.{{ .Name }}={{ .Value }} + # {{ end }} + # matchFeatures: + # - feature: system.osrelease + # matchExpressions: + # ID: {op: InRegexp, value: ["^open.*"]} + # VERSION_ID.major: {op: In, value: ["13", "15"]} + # + # - name: "my template rule 2" + # labelsTemplate: | + # {{ range .pci.device }}my-pci-device.{{ .class }}-{{ .device }}=with-cpuid + # {{ end }} + # matchFeatures: + # - feature: pci.device + # matchExpressions: + # class: {op: InRegexp, value: ["^06"]} + # vendor: ["8086"] + # - feature: cpu.cpuid + # matchExpressions: + # AVX: {op: Exists} + # + # # The following examples demonstrate vars field and back-referencing + # # previous labels and vars + # - name: "my dummy kernel rule" + # labels: + # "my.kernel.feature": "true" + # matchFeatures: + # - feature: kernel.version + # matchExpressions: + # major: {op: Gt, value: ["2"]} + # + # - name: "my dummy rule with no labels" + # vars: + # "my.dummy.var": "1" + # matchFeatures: + # - feature: cpu.cpuid + # matchExpressions: {} + # + # - name: "my rule using backrefs" + # labels: + # "my.backref.feature": "true" + # matchFeatures: + # - feature: rule.matched + # matchExpressions: + # my.kernel.feature: {op: IsTrue} + # my.dummy.var: {op: Gt, value: ["0"]} + # +### + + daemonsetAnnotations: {} + podSecurityContext: {} + # fsGroup: 2000 + + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: [ "ALL" ] + readOnlyRootFilesystem: true + runAsNonRoot: true + # runAsUser: 1000 + + serviceAccount: + # Specifies whether a service account should be created. + # We create this by default to make it easier for downstream users to apply PodSecurityPolicies. + create: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: + + rbac: + create: true + + # Allow users to mount the hostPath /usr/src, useful for RHCOS on s390x + # Does not work on systems without /usr/src AND a read-only /usr, such as Talos + mountUsrSrc: false + + resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + nodeSelector: {} + + tolerations: [] + + annotations: {} + + affinity: {} + + priorityClassName: "" + +topologyUpdater: + config: ### + ## key = node name, value = list of resources to be excluded. + ## use * to exclude from all nodes. + ## an example for how the exclude list should looks like + #excludeList: + # node1: [cpu] + # node2: [memory, example/deviceA] + # *: [hugepages-2Mi] +### + + enable: false + createCRDs: false + + serviceAccount: + create: true + annotations: {} + name: + rbac: + create: true + + kubeletConfigPath: + kubeletPodResourcesSockPath: + updateInterval: 60s + watchNamespace: "*" + kubeletStateDir: /var/lib/kubelet + + podSecurityContext: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: [ "ALL" ] + readOnlyRootFilesystem: true + runAsUser: 0 + + resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + nodeSelector: {} + tolerations: [] + annotations: {} + affinity: {} + podSetFingerprint: true + +topologyGC: + enable: true + replicaCount: 1 + + serviceAccount: + create: true + annotations: {} + name: + rbac: + create: true + + interval: 1h + + podSecurityContext: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: [ "ALL" ] + readOnlyRootFilesystem: true + runAsNonRoot: true + + resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + nodeSelector: {} + tolerations: [] + annotations: {} + affinity: {} + +# Optionally use encryption for worker <--> master comms +# TODO: verify hostname is not yet supported +# +# If you do not enable certManager (and have it installed) you will +# need to manually, or otherwise, provision the TLS certs as secrets +tls: + enable: false + certManager: false diff --git a/deployment/network-operator/templates/upgrade-crd.yaml b/deployment/network-operator/templates/upgrade-crd.yaml index b935070d..64096060 100644 --- a/deployment/network-operator/templates/upgrade-crd.yaml +++ b/deployment/network-operator/templates/upgrade-crd.yaml @@ -76,5 +76,8 @@ spec: {{- if .Values.sriovNetworkOperator.enabled }} kubectl apply -f /crds/sriov-network-operator; {{- end }} + {{- if .Values.nfd.enabled }} + if ! kubectl get crd | grep -E "nodefeaturerules\.nfd\.k8s-sigs\.io|nodefeatures\.nfd\.k8s-sigs\.io" ; then kubectl apply -f /crds/node-feature-discovery; fi + {{- end }} restartPolicy: OnFailure {{- end }} diff --git a/deployment/network-operator/values.yaml b/deployment/network-operator/values.yaml index 0cb19138..3ede38b1 100644 --- a/deployment/network-operator/values.yaml +++ b/deployment/network-operator/values.yaml @@ -18,6 +18,7 @@ nfd: enabled: true + nodeFeatureRules: false psp: enabled: false @@ -32,28 +33,37 @@ sriovNetworkOperator: # Node Feature discovery chart related values node-feature-discovery: - image: - pullPolicy: IfNotPresent - nodeFeatureRule: - createCRD: false - master: - instance: "nvidia.networking" + enableNodeFeatureApi: true worker: + serviceAccount: + name: node-feature-discovery + # disable creation to avoid duplicate serviceaccount creation by master spec below + create: false tolerations: - - key: "nvidia.com/gpu" - operator: "Equal" - value: "present" - effect: "NoSchedule" + - key: "node-role.kubernetes.io/master" + operator: "Equal" + value: "" + effect: "NoSchedule" + - key: nvidia.com/gpu + operator: Exists + effect: NoSchedule config: sources: pci: deviceClassWhitelist: - - "02" - - "0200" - - "0207" + - "02" + - "0200" + - "0207" + - "0300" + - "0302" deviceLabelFields: - - vendor - + - vendor + master: + serviceAccount: + name: node-feature-discovery + create: true + config: + extraLabelNs: ["nvidia.com"] # SR-IOV Network Operator chart related values sriov-network-operator: diff --git a/hack/templates/values/values.template b/hack/templates/values/values.template index 21f3328d..bada7834 100644 --- a/hack/templates/values/values.template +++ b/hack/templates/values/values.template @@ -18,6 +18,7 @@ nfd: enabled: true + nodeFeatureRules: false psp: enabled: false @@ -32,28 +33,37 @@ sriovNetworkOperator: # Node Feature discovery chart related values node-feature-discovery: - image: - pullPolicy: IfNotPresent - nodeFeatureRule: - createCRD: false - master: - instance: "nvidia.networking" + enableNodeFeatureApi: true worker: + serviceAccount: + name: node-feature-discovery + # disable creation to avoid duplicate serviceaccount creation by master spec below + create: false tolerations: - - key: "nvidia.com/gpu" - operator: "Equal" - value: "present" - effect: "NoSchedule" + - key: "node-role.kubernetes.io/master" + operator: "Equal" + value: "" + effect: "NoSchedule" + - key: nvidia.com/gpu + operator: Exists + effect: NoSchedule config: sources: pci: deviceClassWhitelist: - - "02" - - "0200" - - "0207" + - "02" + - "0200" + - "0207" + - "0300" + - "0302" deviceLabelFields: - - vendor - + - vendor + master: + serviceAccount: + name: node-feature-discovery + create: true + config: + extraLabelNs: ["nvidia.com"] # SR-IOV Network Operator chart related values sriov-network-operator: From ca22320fcc958595c2731616ba961b28c40fa636 Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Arango Gutierrez Date: Mon, 26 Jun 2023 11:30:29 +0200 Subject: [PATCH 3/3] Address reviewer requested changes Signed-off-by: Carlos Eduardo Arango Gutierrez --- deployment/network-operator/templates/upgrade-crd.yaml | 2 +- deployment/network-operator/values.yaml | 10 ++++++---- hack/templates/values/values.template | 10 ++++++---- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/deployment/network-operator/templates/upgrade-crd.yaml b/deployment/network-operator/templates/upgrade-crd.yaml index 64096060..d94114b6 100644 --- a/deployment/network-operator/templates/upgrade-crd.yaml +++ b/deployment/network-operator/templates/upgrade-crd.yaml @@ -77,7 +77,7 @@ spec: kubectl apply -f /crds/sriov-network-operator; {{- end }} {{- if .Values.nfd.enabled }} - if ! kubectl get crd | grep -E "nodefeaturerules\.nfd\.k8s-sigs\.io|nodefeatures\.nfd\.k8s-sigs\.io" ; then kubectl apply -f /crds/node-feature-discovery; fi + kubectl apply -f /crds/node-feature-discovery; {{- end }} restartPolicy: OnFailure {{- end }} diff --git a/deployment/network-operator/values.yaml b/deployment/network-operator/values.yaml index 3ede38b1..00a34288 100644 --- a/deployment/network-operator/values.yaml +++ b/deployment/network-operator/values.yaml @@ -18,7 +18,6 @@ nfd: enabled: true - nodeFeatureRules: false psp: enabled: false @@ -33,7 +32,7 @@ sriovNetworkOperator: # Node Feature discovery chart related values node-feature-discovery: - enableNodeFeatureApi: true + enableNodeFeatureApi: false worker: serviceAccount: name: node-feature-discovery @@ -41,8 +40,10 @@ node-feature-discovery: create: false tolerations: - key: "node-role.kubernetes.io/master" - operator: "Equal" - value: "" + operator: "Exists" + effect: "NoSchedule" + - key: "node-role.kubernetes.io/control-plane" + operator: "Exists" effect: "NoSchedule" - key: nvidia.com/gpu operator: Exists @@ -59,6 +60,7 @@ node-feature-discovery: deviceLabelFields: - vendor master: + instance: "nvidia.networking" serviceAccount: name: node-feature-discovery create: true diff --git a/hack/templates/values/values.template b/hack/templates/values/values.template index bada7834..3614459b 100644 --- a/hack/templates/values/values.template +++ b/hack/templates/values/values.template @@ -18,7 +18,6 @@ nfd: enabled: true - nodeFeatureRules: false psp: enabled: false @@ -33,7 +32,7 @@ sriovNetworkOperator: # Node Feature discovery chart related values node-feature-discovery: - enableNodeFeatureApi: true + enableNodeFeatureApi: false worker: serviceAccount: name: node-feature-discovery @@ -41,8 +40,10 @@ node-feature-discovery: create: false tolerations: - key: "node-role.kubernetes.io/master" - operator: "Equal" - value: "" + operator: "Exists" + effect: "NoSchedule" + - key: "node-role.kubernetes.io/control-plane" + operator: "Exists" effect: "NoSchedule" - key: nvidia.com/gpu operator: Exists @@ -59,6 +60,7 @@ node-feature-discovery: deviceLabelFields: - vendor master: + instance: "nvidia.networking" serviceAccount: name: node-feature-discovery create: true