Skip to content

Commit

Permalink
add reason tags to KSM and kubelet metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
keisku committed Apr 27, 2024
1 parent 1dd94e2 commit 1be85ef
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -273,9 +273,11 @@ func containerWaitingReasonTransformer(s sender.Sender, name string, metric ksms
}

var allowedTerminatedReasons = map[string]struct{}{
"oomkilled": {},
"containercannotrun": {},
"error": {},
"oomkilled": {},
"containercannotrun": {},
"error": {},
"deadlineexceeded": {},
"backofflimitexceeded": {},
}

// containerTerminatedReasonTransformer validates the container waiting reasons for metric kube_pod_container_status_terminated_reason
Expand Down Expand Up @@ -426,7 +428,7 @@ func validateJob(val float64, tags []string) ([]string, bool) {
kubeCronjob := ""
for _, tag := range tags {
split := strings.Split(tag, ":")
if len(split) == 2 && split[0] == "kube_job" || split[0] == "job" || split[0] == "job_name" {
if len(split) == 2 && split[0] == "kube_job" || split[0] == "job" || split[0] == "job_name" || split[0] == "reason" {
// Trim the timestamp suffix to avoid high cardinality
if name, trimmed := trimJobTag(split[1]); trimmed {
// The trimmed job name corresponds to the parent cronjob name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -343,17 +343,17 @@ func Test_jobFailedTransformer(t *testing.T) {
"condition": "true",
},
},
tags: []string{"job_name:foo-1509998340", "namespace:default", "condition:true"},
tags: []string{"job_name:foo-1509998340", "namespace:default", "condition:true", "reason:backofflimitexceeded"},
},
expectedServiceCheck: &serviceCheck{
name: "kubernetes_state.job.complete",
status: servicecheck.ServiceCheckCritical,
tags: []string{"kube_cronjob:foo", "namespace:default"},
tags: []string{"kube_cronjob:foo", "namespace:default", "reason:backofflimitexceeded"},
},
expectedMetric: &metricsExpected{
name: "kubernetes_state.job.completion.failed",
val: 1,
tags: []string{"kube_cronjob:foo", "namespace:default"},
tags: []string{"kube_cronjob:foo", "namespace:default", "reason:backofflimitexceeded"},
},
},
{
Expand All @@ -368,17 +368,17 @@ func Test_jobFailedTransformer(t *testing.T) {
"condition": "true",
},
},
tags: []string{"job:foo-1509998340", "namespace:default", "condition:true"},
tags: []string{"job:foo-1509998340", "namespace:default", "condition:true", "reason:deadlineexceeded"},
},
expectedServiceCheck: &serviceCheck{
name: "kubernetes_state.job.complete",
status: servicecheck.ServiceCheckCritical,
tags: []string{"kube_cronjob:foo", "namespace:default"},
tags: []string{"kube_cronjob:foo", "namespace:default", "reason:deadlineexceeded"},
},
expectedMetric: &metricsExpected{
name: "kubernetes_state.job.completion.failed",
val: 1,
tags: []string{"kube_cronjob:foo", "namespace:default"},
tags: []string{"kube_cronjob:foo", "namespace:default", "reason:deadlineexceeded"},
},
},
{
Expand All @@ -393,7 +393,7 @@ func Test_jobFailedTransformer(t *testing.T) {
"condition": "true",
},
},
tags: []string{"job_name:foo-1509998340", "namespace:default", "condition:true"},
tags: []string{"job_name:foo-1509998340", "namespace:default", "condition:true", "reason:backofflimitexceeded"},
},
expectedServiceCheck: nil,
expectedMetric: nil,
Expand Down Expand Up @@ -1009,6 +1009,48 @@ func Test_containerTerminatedReasonTransformer(t *testing.T) {
tags: []string{"container:foo", "pod:bar", "namespace:default", "reason:Error"},
},
},
{
name: "BackoffLimitExceeded",
args: args{
name: "kube_pod_container_status_terminated_reason",
metric: ksmstore.DDMetric{
Val: 1,
Labels: map[string]string{
"container": "foo",
"pod": "bar",
"namespace": "default",
"reason": "BackoffLimitExceeded",
},
},
tags: []string{"container:foo", "pod:bar", "namespace:default", "reason:BackoffLimitExceeded"},
},
expected: &metricsExpected{
name: "kubernetes_state.container.status_report.count.terminated",
val: 1,
tags: []string{"container:foo", "pod:bar", "namespace:default", "reason:BackoffLimitExceeded"},
},
},
{
name: "DeadlineExceeded",
args: args{
name: "kube_pod_container_status_terminated_reason",
metric: ksmstore.DDMetric{
Val: 1,
Labels: map[string]string{
"container": "foo",
"pod": "bar",
"namespace": "default",
"reason": "DeadlineExceeded",
},
},
tags: []string{"container:foo", "pod:bar", "namespace:default", "reason:DeadlineExceeded"},
},
expected: &metricsExpected{
name: "kubernetes_state.container.status_report.count.terminated",
val: 1,
tags: []string{"container:foo", "pod:bar", "namespace:default", "reason:DeadlineExceeded"},
},
},
}
for _, tt := range tests {
s := mocksender.NewMockSender("ksm")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,13 @@ var includeContainerStateReason = map[string][]string{
"invalidimagename",
"createcontainerconfigerror",
},
"terminated": {"oomkilled", "containercannotrun", "error"},
"terminated": {
"oomkilled",
"containercannotrun",
"error",
"deadlineexceeded",
"backofflimitexceeded",
},
}

const kubeNamespaceTag = "kube_namespace"
Expand Down

0 comments on commit 1be85ef

Please sign in to comment.