diff --git a/.spelling b/.spelling index 507e9340a9a2..5f01c0594b50 100644 --- a/.spelling +++ b/.spelling @@ -148,6 +148,7 @@ k8s-jobs kube kubelet kubernetes +liveness localhost memoization memoized diff --git a/docs/environment-variables.md b/docs/environment-variables.md index 4f18ce9ace84..3d38c2f0c544 100644 --- a/docs/environment-variables.md +++ b/docs/environment-variables.md @@ -33,12 +33,14 @@ most users. Environment variables may be removed at any time. | `GZIP_IMPLEMENTATION` | `string` | `PGZip` | The implementation of compression/decompression. Currently only "`PGZip`" and "`GZip`" are supported. | | `INFORMER_WRITE_BACK` | `bool` | `true` | Whether to write back to informer instead of catching up. | | `HEALTHZ_AGE` | `time.Duration` | `5m` | How old a un-reconciled workflow is to report unhealthy. | +| `HEALTHZ_LIST_LIMIT` | `int` | `200` | The maximum number of responses to return for a list call on workflows for liveness check. | | `INDEX_WORKFLOW_SEMAPHORE_KEYS` | `bool` | `true` | Whether or not to index semaphores. | | `LEADER_ELECTION_IDENTITY` | `string` | Controller's `metadata.name` | The ID used for workflow controllers to elect a leader. | | `LEADER_ELECTION_DISABLE` | `bool` | `false` | Whether leader election should be disabled. | | `LEADER_ELECTION_LEASE_DURATION` | `time.Duration` | `15s` | The duration that non-leader candidates will wait to force acquire leadership. | | `LEADER_ELECTION_RENEW_DEADLINE` | `time.Duration` | `10s` | The duration that the acting master will retry refreshing leadership before giving up. | | `LEADER_ELECTION_RETRY_PERIOD` | `time.Duration` | `5s` | The duration that the leader election clients should wait between tries of actions. | +| `LIST_LIMIT` | `int` | `200` | The maximum number of responses to return for a list call on workflows for workflow informer. | | `MAX_OPERATION_TIME` | `time.Duration` | `30s` | The maximum time a workflow operation is allowed to run for before re-queuing the workflow onto the work queue. | | `OFFLOAD_NODE_STATUS_TTL` | `time.Duration` | `5m` | The TTL to delete the offloaded node status. Currently only used for testing. | | `POD_NAMES` | `string` | `v2` | Whether to have pod names contain the template name (v2) or be the node id (v1) - should be set the same for Argo Server. | diff --git a/workflow/controller/controller.go b/workflow/controller/controller.go index 1902c0250ceb..bb206b67ad4a 100644 --- a/workflow/controller/controller.go +++ b/workflow/controller/controller.go @@ -803,6 +803,7 @@ func (wfc *WorkflowController) tweakListOptions(options *metav1.ListOptions) { labelSelector := labels.NewSelector(). Add(util.InstanceIDRequirement(wfc.Config.InstanceID)) options.LabelSelector = labelSelector.String() + options.Limit = int64(env.LookupEnvIntOr("LIST_LIMIT", 200)) } func getWfPriority(obj interface{}) (int32, time.Time) { diff --git a/workflow/controller/healthz.go b/workflow/controller/healthz.go index 9e9bded526cd..033750450c9e 100644 --- a/workflow/controller/healthz.go +++ b/workflow/controller/healthz.go @@ -13,7 +13,8 @@ import ( ) var ( - age = env.LookupEnvDurationOr("HEALTHZ_AGE", 5*time.Minute) + age = env.LookupEnvDurationOr("HEALTHZ_AGE", 5*time.Minute) + limit = int64(env.LookupEnvIntOr("HEALTHZ_LIST_LIMIT", 200)) ) // https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-http-request @@ -30,7 +31,7 @@ func (wfc *WorkflowController) Healthz(w http.ResponseWriter, r *http.Request) { labelSelector := "!" + common.LabelKeyPhase + "," + instanceIDSelector err := func() error { // avoid problems with informers, but directly querying the API - list, err := wfc.wfclientset.ArgoprojV1alpha1().Workflows(wfc.managedNamespace).List(ctx, metav1.ListOptions{LabelSelector: labelSelector}) + list, err := wfc.wfclientset.ArgoprojV1alpha1().Workflows(wfc.managedNamespace).List(ctx, metav1.ListOptions{LabelSelector: labelSelector, Limit: limit}) if err != nil { return err }