From 92f44fd6193c58506273673111ac3e3de4d373cc Mon Sep 17 00:00:00 2001 From: Yury Kulazhenkov Date: Tue, 9 May 2023 13:39:55 +0300 Subject: [PATCH] Add support for leader election By default, the leader election is enabled in the deployment yaml, but controller's replica count is set to 1. This is to prevent IPAM controller misbehaviour in case if user will decide to scale deployment. Signed-off-by: Yury Kulazhenkov --- README.md | 38 +++++++++++++--------- cmd/ipam-controller/app/app.go | 1 + cmd/ipam-controller/app/options/options.go | 27 ++++++++------- deploy/nv-ipam.yaml | 30 +++++++++++++++++ 4 files changed, 69 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 3523517..a42cc5a 100644 --- a/README.md +++ b/README.md @@ -109,43 +109,49 @@ ipam-controller accepts configuration using command line flags and K8s configMap ```text Logging flags: - --log-flush-frequency duration + --log-flush-frequency duration Maximum number of seconds between log flushes (default 5s) - --log-json-info-buffer-size quantity - [Alpha] In JSON format with split output streams, the info messages can be buffered for a while to increase performance. The default value of zero bytes disables buffering. The size can be specified as - number of bytes (512), multiples of 1000 (1K), multiples of 1024 (2Ki), or powers of those (3M, 4G, 5Mi, 6Gi). Enable the LoggingAlphaOptions feature gate to use this. - --log-json-split-stream - [Alpha] In JSON format, write error messages to stderr and info messages to stdout. The default is to write a single stream to stdout. Enable the LoggingAlphaOptions feature gate to use this. - --logging-format string + --log-json-info-buffer-size quantity + [Alpha] In JSON format with split output streams, the info messages can be buffered for a while to increase performance. The default value of zero bytes disables buffering. The + size can be specified as number of bytes (512), multiples of 1000 (1K), multiples of 1024 (2Ki), or powers of those (3M, 4G, 5Mi, 6Gi). Enable the LoggingAlphaOptions feature + gate to use this. + --log-json-split-stream + [Alpha] In JSON format, write error messages to stderr and info messages to stdout. The default is to write a single stream to stdout. Enable the LoggingAlphaOptions feature gate + to use this. + --logging-format string Sets the log format. Permitted formats: "json" (gated by LoggingBetaOptions), "text". (default "text") - -v, --v Level + -v, --v Level number for the log level verbosity - --vmodule pattern=N,... + --vmodule pattern=N,... comma-separated list of pattern=N settings for file-filtered logging (only works for text log format) Common flags: - --feature-gates mapStringBool + --feature-gates mapStringBool A set of key=value pairs that describe feature gates for alpha/experimental features. Options are: AllAlpha=true|false (ALPHA - default=false) AllBeta=true|false (BETA - default=false) ContextualLogging=true|false (ALPHA - default=false) LoggingAlphaOptions=true|false (ALPHA - default=false) LoggingBetaOptions=true|false (BETA - default=true) - --version + --version print binary version and exit Controller flags: - --config-name string + --config-name string The name of the ConfigMap which holds controller configuration (default "nvidia-k8s-ipam-config") - --config-namespace string + --config-namespace string The name of the namespace where ConfigMap with controller configuration exist (default "kube-system") - --health-probe-bind-address string + --health-probe-bind-address string The address the probe endpoint binds to. (default ":8081") - --kubeconfig string + --kubeconfig string Paths to a kubeconfig. Only required if out-of-cluster. - --metrics-bind-address string + --leader-elect + Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager. + --leader-elect-namespace string + Determines the namespace in which the leader election resource will be created. (default "kube-system") + --metrics-bind-address string The address the metric endpoint binds to. (default ":8080") ``` diff --git a/cmd/ipam-controller/app/app.go b/cmd/ipam-controller/app/app.go index 8f0fa66..b659627 100644 --- a/cmd/ipam-controller/app/app.go +++ b/cmd/ipam-controller/app/app.go @@ -121,6 +121,7 @@ func RunController(ctx context.Context, config *rest.Config, opts *options.Optio Port: 9443, HealthProbeBindAddress: opts.ProbeAddr, LeaderElection: opts.EnableLeaderElection, + LeaderElectionNamespace: opts.LeaderElectionNamespace, LeaderElectionID: "dd1643cf.nvidia.com", LeaderElectionReleaseOnCancel: true, }) diff --git a/cmd/ipam-controller/app/options/options.go b/cmd/ipam-controller/app/options/options.go index 9875b71..1e323a0 100644 --- a/cmd/ipam-controller/app/options/options.go +++ b/cmd/ipam-controller/app/options/options.go @@ -25,23 +25,25 @@ import ( // New initialize and return new Options object func New() *Options { return &Options{ - Options: *cmdoptions.New(), - MetricsAddr: ":8080", - ProbeAddr: ":8081", - EnableLeaderElection: false, - ConfigMapName: "nvidia-k8s-ipam-config", - ConfigMapNamespace: "kube-system", + Options: *cmdoptions.New(), + MetricsAddr: ":8080", + ProbeAddr: ":8081", + EnableLeaderElection: false, + LeaderElectionNamespace: "kube-system", + ConfigMapName: "nvidia-k8s-ipam-config", + ConfigMapNamespace: "kube-system", } } // Options holds command line options for controller type Options struct { cmdoptions.Options - MetricsAddr string - ProbeAddr string - EnableLeaderElection bool - ConfigMapName string - ConfigMapNamespace string + MetricsAddr string + ProbeAddr string + EnableLeaderElection bool + LeaderElectionNamespace string + ConfigMapName string + ConfigMapNamespace string } // AddNamedFlagSets register flags for common options in NamedFlagSets @@ -61,6 +63,9 @@ func (o *Options) AddNamedFlagSets(sharedFS *cliflag.NamedFlagSets) { controllerFS.BoolVar(&o.EnableLeaderElection, "leader-elect", o.EnableLeaderElection, "Enable leader election for controller manager. "+ "Enabling this will ensure there is only one active controller manager.") + controllerFS.StringVar(&o.LeaderElectionNamespace, "leader-elect-namespace", o.LeaderElectionNamespace, + "Determines the namespace in which the leader "+ + "election resource will be created.") controllerFS.StringVar(&o.ConfigMapName, "config-name", o.ConfigMapName, "The name of the ConfigMap which holds controller configuration") controllerFS.StringVar(&o.ConfigMapNamespace, "config-namespace", diff --git a/deploy/nv-ipam.yaml b/deploy/nv-ipam.yaml index 7378244..b6a8373 100644 --- a/deploy/nv-ipam.yaml +++ b/deploy/nv-ipam.yaml @@ -132,6 +132,25 @@ rules: - get - list - watch + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + - apiGroups: + - "" + resources: + - events + verbs: + - create + - patch --- kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 @@ -175,6 +194,15 @@ spec: priorityClassName: system-cluster-critical serviceAccountName: nv-ipam-controller affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: name + operator: In + values: + - nv-ipam-controller + topologyKey: "kubernetes.io/hostname" nodeAffinity: preferredDuringSchedulingIgnoredDuringExecution: - weight: 1 @@ -209,6 +237,8 @@ spec: args: - --config-name=nvidia-k8s-ipam-config - --config-namespace=$(POD_NAMESPACE) + - --leader-elect=true + - --leader-elect-namespace=$(POD_NAMESPACE) env: - name: POD_NAMESPACE valueFrom: