From 63eb17cd159801b9287429cf45096da42bb7dce1 Mon Sep 17 00:00:00 2001 From: keisku Date: Thu, 25 Apr 2024 11:13:03 +0900 Subject: [PATCH] add reason tag, BackoffLimitExceeded, DeadlineExceeded --- .../ksm/kubernetes_state_transformers.go | 10 ++-- .../ksm/kubernetes_state_transformers_test.go | 56 ++++++++++++++++--- .../kubelet/provider/pod/provider.go | 8 ++- 3 files changed, 62 insertions(+), 12 deletions(-) diff --git a/pkg/collector/corechecks/cluster/ksm/kubernetes_state_transformers.go b/pkg/collector/corechecks/cluster/ksm/kubernetes_state_transformers.go index bfaa00e4dd8c4e..6b48ede738652a 100644 --- a/pkg/collector/corechecks/cluster/ksm/kubernetes_state_transformers.go +++ b/pkg/collector/corechecks/cluster/ksm/kubernetes_state_transformers.go @@ -273,9 +273,11 @@ func containerWaitingReasonTransformer(s sender.Sender, name string, metric ksms } var allowedTerminatedReasons = map[string]struct{}{ - "oomkilled": {}, - "containercannotrun": {}, - "error": {}, + "oomkilled": {}, + "containercannotrun": {}, + "error": {}, + "deadlineexceeded": {}, + "backofflimitexceeded": {}, } // containerTerminatedReasonTransformer validates the container waiting reasons for metric kube_pod_container_status_terminated_reason @@ -426,7 +428,7 @@ func validateJob(val float64, tags []string) ([]string, bool) { kubeCronjob := "" for _, tag := range tags { split := strings.Split(tag, ":") - if len(split) == 2 && split[0] == "kube_job" || split[0] == "job" || split[0] == "job_name" { + if len(split) == 2 && split[0] == "kube_job" || split[0] == "job" || split[0] == "job_name" || split[0] == "reason" { // Trim the timestamp suffix to avoid high cardinality if name, trimmed := trimJobTag(split[1]); trimmed { // The trimmed job name corresponds to the parent cronjob name diff --git a/pkg/collector/corechecks/cluster/ksm/kubernetes_state_transformers_test.go b/pkg/collector/corechecks/cluster/ksm/kubernetes_state_transformers_test.go index 39251c412419bc..c12a313b8397e1 100644 --- a/pkg/collector/corechecks/cluster/ksm/kubernetes_state_transformers_test.go +++ b/pkg/collector/corechecks/cluster/ksm/kubernetes_state_transformers_test.go @@ -343,17 +343,17 @@ func Test_jobFailedTransformer(t *testing.T) { "condition": "true", }, }, - tags: []string{"job_name:foo-1509998340", "namespace:default", "condition:true"}, + tags: []string{"job_name:foo-1509998340", "namespace:default", "condition:true", "reason:backofflimitexceeded"}, }, expectedServiceCheck: &serviceCheck{ name: "kubernetes_state.job.complete", status: servicecheck.ServiceCheckCritical, - tags: []string{"kube_cronjob:foo", "namespace:default"}, + tags: []string{"kube_cronjob:foo", "namespace:default", "reason:backofflimitexceeded"}, }, expectedMetric: &metricsExpected{ name: "kubernetes_state.job.completion.failed", val: 1, - tags: []string{"kube_cronjob:foo", "namespace:default"}, + tags: []string{"kube_cronjob:foo", "namespace:default", "reason:backofflimitexceeded"}, }, }, { @@ -368,17 +368,17 @@ func Test_jobFailedTransformer(t *testing.T) { "condition": "true", }, }, - tags: []string{"job:foo-1509998340", "namespace:default", "condition:true"}, + tags: []string{"job:foo-1509998340", "namespace:default", "condition:true", "reason:deadlineexceeded"}, }, expectedServiceCheck: &serviceCheck{ name: "kubernetes_state.job.complete", status: servicecheck.ServiceCheckCritical, - tags: []string{"kube_cronjob:foo", "namespace:default"}, + tags: []string{"kube_cronjob:foo", "namespace:default", "reason:deadlineexceeded"}, }, expectedMetric: &metricsExpected{ name: "kubernetes_state.job.completion.failed", val: 1, - tags: []string{"kube_cronjob:foo", "namespace:default"}, + tags: []string{"kube_cronjob:foo", "namespace:default", "reason:deadlineexceeded"}, }, }, { @@ -393,7 +393,7 @@ func Test_jobFailedTransformer(t *testing.T) { "condition": "true", }, }, - tags: []string{"job_name:foo-1509998340", "namespace:default", "condition:true"}, + tags: []string{"job_name:foo-1509998340", "namespace:default", "condition:true", "reason:backofflimitexceeded"}, }, expectedServiceCheck: nil, expectedMetric: nil, @@ -1009,6 +1009,48 @@ func Test_containerTerminatedReasonTransformer(t *testing.T) { tags: []string{"container:foo", "pod:bar", "namespace:default", "reason:Error"}, }, }, + { + name: "BackoffLimitExceeded", + args: args{ + name: "kube_pod_container_status_terminated_reason", + metric: ksmstore.DDMetric{ + Val: 1, + Labels: map[string]string{ + "container": "foo", + "pod": "bar", + "namespace": "default", + "reason": "BackoffLimitExceeded", + }, + }, + tags: []string{"container:foo", "pod:bar", "namespace:default", "reason:BackoffLimitExceeded"}, + }, + expected: &metricsExpected{ + name: "kubernetes_state.container.status_report.count.terminated", + val: 1, + tags: []string{"container:foo", "pod:bar", "namespace:default", "reason:BackoffLimitExceeded"}, + }, + }, + { + name: "DeadlineExceeded", + args: args{ + name: "kube_pod_container_status_terminated_reason", + metric: ksmstore.DDMetric{ + Val: 1, + Labels: map[string]string{ + "container": "foo", + "pod": "bar", + "namespace": "default", + "reason": "DeadlineExceeded", + }, + }, + tags: []string{"container:foo", "pod:bar", "namespace:default", "reason:DeadlineExceeded"}, + }, + expected: &metricsExpected{ + name: "kubernetes_state.container.status_report.count.terminated", + val: 1, + tags: []string{"container:foo", "pod:bar", "namespace:default", "reason:DeadlineExceeded"}, + }, + }, } for _, tt := range tests { s := mocksender.NewMockSender("ksm") diff --git a/pkg/collector/corechecks/containers/kubelet/provider/pod/provider.go b/pkg/collector/corechecks/containers/kubelet/provider/pod/provider.go index 34f24a41a2ac3d..712926b34733ee 100644 --- a/pkg/collector/corechecks/containers/kubelet/provider/pod/provider.go +++ b/pkg/collector/corechecks/containers/kubelet/provider/pod/provider.go @@ -37,7 +37,13 @@ var includeContainerStateReason = map[string][]string{ "invalidimagename", "createcontainerconfigerror", }, - "terminated": {"oomkilled", "containercannotrun", "error"}, + "terminated": { + "oomkilled", + "containercannotrun", + "error", + "deadlineexceeded", + "backofflimitexceeded", + }, } const kubeNamespaceTag = "kube_namespace"