Skip to content

Commit

Permalink
Merge pull request #1 from sumitk23/cronjob-node-metrics
Browse files Browse the repository at this point in the history
Added missing cronjob and node atrributes : https://github.com/kubern…
  • Loading branch information
sumitk23 committed Jul 7, 2021
2 parents 7594278 + f46fb4e commit 8780cfb
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 19 deletions.
2 changes: 2 additions & 0 deletions docs/cronjob-metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@
| kube_cronjob_spec_suspend | Gauge | `cronjob`=&lt;cronjob-name&gt; <br> `namespace`=&lt;cronjob-namespace&gt; | STABLE
| kube_cronjob_spec_starting_deadline_seconds | Gauge | `cronjob`=&lt;cronjob-name&gt; <br> `namespace`=&lt;cronjob-namespace&gt; | STABLE
| kube_cronjob_metadata_resource_version| Gauge | `cronjob`=&lt;cronjob-name&gt; <br> `namespace`=&lt;cronjob-namespace&gt; | STABLE
| kube_cronjob_spec_successful_job_history_limit | Gauge | `cronjob`=&lt;cronjob-name&gt; <br> `namespace`=&lt;cronjob-namespace&gt; | STABLE
| kube_cronjob_spec_failed_job_history_limit | Gauge | `cronjob`=&lt;cronjob-name&gt; <br> `namespace`=&lt;cronjob-namespace&gt; | STABLE
2 changes: 1 addition & 1 deletion docs/node-metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

| Metric name| Metric type | Description | Unit (where applicable) | Labels/tags | Status |
| ---------- | ----------- | ----------- | ----------------------- | ----------- | ------ |
| kube_node_info | Gauge | Information about a cluster node| |`node`=&lt;node-address&gt; <br> `kernel_version`=&lt;kernel-version&gt; <br> `os_image`=&lt;os-image-name&gt; <br> `container_runtime_version`=&lt;container-runtime-and-version-combination&gt; <br> `kubelet_version`=&lt;kubelet-version&gt; <br> `kubeproxy_version`=&lt;kubeproxy-version&gt; <br> `pod_cidr`=&lt;pod-cidr&gt; <br> `provider_id`=&lt;provider-id&gt; <br> `internal_ip`=&lt;internal-ip&gt; | STABLE |
| kube_node_info | Gauge | Information about a cluster node| |`node`=&lt;node-address&gt; <br> `kernel_version`=&lt;kernel-version&gt; <br> `os_image`=&lt;os-image-name&gt; <br> `container_runtime_version`=&lt;container-runtime-and-version-combination&gt; <br> `kubelet_version`=&lt;kubelet-version&gt; <br> `kubeproxy_version`=&lt;kubeproxy-version&gt; <br> `pod_cidr`=&lt;pod-cidr&gt; <br> `provider_id`=&lt;provider-id&gt; <br> `system_uuid`=&lt;system-uuid&gt; <br> `internal_ip`=&lt;internal-ip&gt; | STABLE |
| kube_node_labels | Gauge | Kubernetes labels converted to Prometheus labels | | `node`=&lt;node-address&gt; <br> `label_NODE_LABEL`=&lt;NODE_LABEL&gt; | STABLE |
| kube_node_role | Gauge | The role of a cluster node | | `node`=&lt;node-address&gt; <br> `role`=&lt;NODE_ROLE&gt; | EXPERIMENTAL |
| kube_node_spec_unschedulable | Gauge | Whether a node can schedule new pods | | `node`=&lt;node-address&gt;| STABLE |
Expand Down
42 changes: 42 additions & 0 deletions internal/store/cronjob.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,48 @@ func cronJobMetricFamilies(allowLabelsList []string) []generator.FamilyGenerator
}
}),
),
*generator.NewFamilyGenerator(
"kube_cronjob_spec_successful_job_history_limit",
"Successful job history limit tells the controller how many completed jobs should be preserved.",
metric.Gauge,
"",
wrapCronJobFunc(func(j *batchv1beta1.CronJob) *metric.Family {
ms := []*metric.Metric{}

if j.Spec.SuccessfulJobsHistoryLimit != nil {
ms = append(ms, &metric.Metric{
LabelKeys: []string{},
LabelValues: []string{},
Value: float64(*j.Spec.SuccessfulJobsHistoryLimit),
})
}

return &metric.Family{
Metrics: ms,
}
}),
),
*generator.NewFamilyGenerator(
"kube_cronjob_spec_failed_job_history_limit",
"Failed job history limit tells the controller how many failed jobs should be preserved.",
metric.Gauge,
"",
wrapCronJobFunc(func(j *batchv1beta1.CronJob) *metric.Family {
ms := []*metric.Metric{}

if j.Spec.FailedJobsHistoryLimit != nil {
ms = append(ms, &metric.Metric{
LabelKeys: []string{},
LabelValues: []string{},
Value: float64(*j.Spec.FailedJobsHistoryLimit),
})
}

return &metric.Family{
Metrics: ms,
}
}),
),
}
}

Expand Down
56 changes: 41 additions & 15 deletions internal/store/cronjob_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ var (
SuspendTrue = true
SuspendFalse = false
StartingDeadlineSeconds300 int64 = 300
SuccessfulJobHistoryLimit3 int32 = 3
FailedJobHistoryLimit1 int32 = 1

// "1520742896" is "2018/3/11 12:34:56" in "Asia/Shanghai".
ActiveRunningCronJob1LastScheduleTime = time.Unix(1520742896, 0)
Expand Down Expand Up @@ -116,18 +118,22 @@ func TestCronJobStore(t *testing.T) {
LastScheduleTime: &metav1.Time{Time: ActiveRunningCronJob1LastScheduleTime},
},
Spec: batchv1beta1.CronJobSpec{
StartingDeadlineSeconds: &StartingDeadlineSeconds300,
ConcurrencyPolicy: "Forbid",
Suspend: &SuspendFalse,
Schedule: "0 */6 * * *",
StartingDeadlineSeconds: &StartingDeadlineSeconds300,
ConcurrencyPolicy: "Forbid",
Suspend: &SuspendFalse,
Schedule: "0 */6 * * *",
SuccessfulJobsHistoryLimit: &SuccessfulJobHistoryLimit3,
FailedJobsHistoryLimit: &FailedJobHistoryLimit1,
},
},
Want: `
# HELP kube_cronjob_created Unix creation timestamp
# HELP kube_cronjob_info Info about cronjob.
# HELP kube_cronjob_labels Kubernetes labels converted to Prometheus labels.
# HELP kube_cronjob_next_schedule_time Next time the cronjob should be scheduled. The time after lastScheduleTime, or after the cron job's creation time if it's never been scheduled. Use this to determine if the job is delayed.
# HELP kube_cronjob_spec_failed_job_history_limit Failed job history limit tells the controller how many failed jobs should be preserved.
# HELP kube_cronjob_spec_starting_deadline_seconds Deadline in seconds for starting the job if it misses scheduled time for any reason.
# HELP kube_cronjob_spec_successful_job_history_limit Successful job history limit tells the controller how many completed jobs should be preserved.
# HELP kube_cronjob_spec_suspend Suspend flag tells the controller to suspend subsequent executions.
# HELP kube_cronjob_status_active Active holds pointers to currently running jobs.
# HELP kube_cronjob_metadata_resource_version Resource version representing a specific version of the cronjob.
Expand All @@ -136,21 +142,25 @@ func TestCronJobStore(t *testing.T) {
# TYPE kube_cronjob_info gauge
# TYPE kube_cronjob_labels gauge
# TYPE kube_cronjob_next_schedule_time gauge
# TYPE kube_cronjob_spec_failed_job_history_limit gauge
# TYPE kube_cronjob_spec_starting_deadline_seconds gauge
# TYPE kube_cronjob_spec_successful_job_history_limit gauge
# TYPE kube_cronjob_spec_suspend gauge
# TYPE kube_cronjob_status_active gauge
# TYPE kube_cronjob_metadata_resource_version gauge
# TYPE kube_cronjob_status_last_schedule_time gauge
kube_cronjob_info{concurrency_policy="Forbid",cronjob="ActiveRunningCronJob1",namespace="ns1",schedule="0 */6 * * *"} 1
kube_cronjob_labels{cronjob="ActiveRunningCronJob1",namespace="ns1"} 1
kube_cronjob_spec_failed_job_history_limit{cronjob="ActiveRunningCronJob1",namespace="ns1"} 1
kube_cronjob_spec_starting_deadline_seconds{cronjob="ActiveRunningCronJob1",namespace="ns1"} 300
kube_cronjob_spec_successful_job_history_limit{cronjob="ActiveRunningCronJob1",namespace="ns1"} 3
kube_cronjob_spec_suspend{cronjob="ActiveRunningCronJob1",namespace="ns1"} 0
kube_cronjob_status_active{cronjob="ActiveRunningCronJob1",namespace="ns1"} 2
kube_cronjob_metadata_resource_version{cronjob="ActiveRunningCronJob1",namespace="ns1"} 11111
kube_cronjob_status_last_schedule_time{cronjob="ActiveRunningCronJob1",namespace="ns1"} 1.520742896e+09
` + fmt.Sprintf("kube_cronjob_next_schedule_time{cronjob=\"ActiveRunningCronJob1\",namespace=\"ns1\"} %ve+09\n",
float64(ActiveRunningCronJob1NextScheduleTime.Unix())/math.Pow10(9)),
MetricNames: []string{"kube_cronjob_next_schedule_time", "kube_cronjob_spec_starting_deadline_seconds", "kube_cronjob_status_active", "kube_cronjob_metadata_resource_version", "kube_cronjob_spec_suspend", "kube_cronjob_info", "kube_cronjob_created", "kube_cronjob_labels", "kube_cronjob_status_last_schedule_time"},
MetricNames: []string{"kube_cronjob_next_schedule_time", "kube_cronjob_spec_starting_deadline_seconds", "kube_cronjob_status_active", "kube_cronjob_metadata_resource_version", "kube_cronjob_spec_suspend", "kube_cronjob_info", "kube_cronjob_created", "kube_cronjob_labels", "kube_cronjob_status_last_schedule_time", "kube_cronjob_spec_successful_job_history_limit", "kube_cronjob_spec_failed_job_history_limit"},
},
{
Obj: &batchv1beta1.CronJob{
Expand All @@ -168,38 +178,46 @@ func TestCronJobStore(t *testing.T) {
LastScheduleTime: &metav1.Time{Time: SuspendedCronJob1LastScheduleTime},
},
Spec: batchv1beta1.CronJobSpec{
StartingDeadlineSeconds: &StartingDeadlineSeconds300,
ConcurrencyPolicy: "Forbid",
Suspend: &SuspendTrue,
Schedule: "0 */3 * * *",
StartingDeadlineSeconds: &StartingDeadlineSeconds300,
ConcurrencyPolicy: "Forbid",
Suspend: &SuspendTrue,
Schedule: "0 */3 * * *",
SuccessfulJobsHistoryLimit: &SuccessfulJobHistoryLimit3,
FailedJobsHistoryLimit: &FailedJobHistoryLimit1,
},
},
Want: `
# HELP kube_cronjob_created Unix creation timestamp
# HELP kube_cronjob_info Info about cronjob.
# HELP kube_cronjob_labels Kubernetes labels converted to Prometheus labels.
# HELP kube_cronjob_spec_failed_job_history_limit Failed job history limit tells the controller how many failed jobs should be preserved.
# HELP kube_cronjob_spec_starting_deadline_seconds Deadline in seconds for starting the job if it misses scheduled time for any reason.
# HELP kube_cronjob_spec_successful_job_history_limit Successful job history limit tells the controller how many completed jobs should be preserved.
# HELP kube_cronjob_spec_suspend Suspend flag tells the controller to suspend subsequent executions.
# HELP kube_cronjob_status_active Active holds pointers to currently running jobs.
# HELP kube_cronjob_metadata_resource_version Resource version representing a specific version of the cronjob.
# HELP kube_cronjob_status_last_schedule_time LastScheduleTime keeps information of when was the last time the job was successfully scheduled.
# TYPE kube_cronjob_created gauge
# TYPE kube_cronjob_info gauge
# TYPE kube_cronjob_labels gauge
# TYPE kube_cronjob_spec_failed_job_history_limit gauge
# TYPE kube_cronjob_spec_starting_deadline_seconds gauge
# TYPE kube_cronjob_spec_successful_job_history_limit gauge
# TYPE kube_cronjob_spec_suspend gauge
# TYPE kube_cronjob_status_active gauge
# TYPE kube_cronjob_metadata_resource_version gauge
# TYPE kube_cronjob_status_last_schedule_time gauge
kube_cronjob_info{concurrency_policy="Forbid",cronjob="SuspendedCronJob1",namespace="ns1",schedule="0 */3 * * *"} 1
kube_cronjob_labels{cronjob="SuspendedCronJob1",namespace="ns1"} 1
kube_cronjob_spec_failed_job_history_limit{cronjob="SuspendedCronJob1",namespace="ns1"} 1
kube_cronjob_spec_starting_deadline_seconds{cronjob="SuspendedCronJob1",namespace="ns1"} 300
kube_cronjob_spec_successful_job_history_limit{cronjob="SuspendedCronJob1",namespace="ns1"} 3
kube_cronjob_spec_suspend{cronjob="SuspendedCronJob1",namespace="ns1"} 1
kube_cronjob_status_active{cronjob="SuspendedCronJob1",namespace="ns1"} 0
kube_cronjob_metadata_resource_version{cronjob="SuspendedCronJob1",namespace="ns1"} 22222
kube_cronjob_status_last_schedule_time{cronjob="SuspendedCronJob1",namespace="ns1"} 1.520762696e+09
`,
MetricNames: []string{"kube_cronjob_spec_starting_deadline_seconds", "kube_cronjob_status_active", "kube_cronjob_metadata_resource_version", "kube_cronjob_spec_suspend", "kube_cronjob_info", "kube_cronjob_created", "kube_cronjob_labels", "kube_cronjob_status_last_schedule_time"},
MetricNames: []string{"kube_cronjob_spec_starting_deadline_seconds", "kube_cronjob_status_active", "kube_cronjob_metadata_resource_version", "kube_cronjob_spec_suspend", "kube_cronjob_info", "kube_cronjob_created", "kube_cronjob_labels", "kube_cronjob_status_last_schedule_time", "kube_cronjob_spec_successful_job_history_limit", "kube_cronjob_spec_failed_job_history_limit"},
},
{
Obj: &batchv1beta1.CronJob{
Expand All @@ -218,40 +236,48 @@ func TestCronJobStore(t *testing.T) {
LastScheduleTime: nil,
},
Spec: batchv1beta1.CronJobSpec{
StartingDeadlineSeconds: &StartingDeadlineSeconds300,
ConcurrencyPolicy: "Forbid",
Suspend: &SuspendFalse,
Schedule: "25 * * * *",
StartingDeadlineSeconds: &StartingDeadlineSeconds300,
ConcurrencyPolicy: "Forbid",
Suspend: &SuspendFalse,
Schedule: "25 * * * *",
SuccessfulJobsHistoryLimit: &SuccessfulJobHistoryLimit3,
FailedJobsHistoryLimit: &FailedJobHistoryLimit1,
},
},
Want: `
# HELP kube_cronjob_created Unix creation timestamp
# HELP kube_cronjob_info Info about cronjob.
# HELP kube_cronjob_labels Kubernetes labels converted to Prometheus labels.
# HELP kube_cronjob_next_schedule_time Next time the cronjob should be scheduled. The time after lastScheduleTime, or after the cron job's creation time if it's never been scheduled. Use this to determine if the job is delayed.
# HELP kube_cronjob_spec_failed_job_history_limit Failed job history limit tells the controller how many failed jobs should be preserved.
# HELP kube_cronjob_spec_starting_deadline_seconds Deadline in seconds for starting the job if it misses scheduled time for any reason.
# HELP kube_cronjob_spec_successful_job_history_limit Successful job history limit tells the controller how many completed jobs should be preserved.
# HELP kube_cronjob_spec_suspend Suspend flag tells the controller to suspend subsequent executions.
# HELP kube_cronjob_status_active Active holds pointers to currently running jobs.
# HELP kube_cronjob_metadata_resource_version Resource version representing a specific version of the cronjob.
# TYPE kube_cronjob_created gauge
# TYPE kube_cronjob_info gauge
# TYPE kube_cronjob_labels gauge
# TYPE kube_cronjob_next_schedule_time gauge
# TYPE kube_cronjob_spec_failed_job_history_limit gauge
# TYPE kube_cronjob_spec_starting_deadline_seconds gauge
# TYPE kube_cronjob_spec_successful_job_history_limit gauge
# TYPE kube_cronjob_spec_suspend gauge
# TYPE kube_cronjob_status_active gauge
# TYPE kube_cronjob_metadata_resource_version gauge
kube_cronjob_spec_starting_deadline_seconds{cronjob="ActiveCronJob1NoLastScheduled",namespace="ns1"} 300
kube_cronjob_status_active{cronjob="ActiveCronJob1NoLastScheduled",namespace="ns1"} 0
kube_cronjob_metadata_resource_version{cronjob="ActiveCronJob1NoLastScheduled",namespace="ns1"} 33333
kube_cronjob_spec_failed_job_history_limit{cronjob="ActiveCronJob1NoLastScheduled",namespace="ns1"} 1
kube_cronjob_spec_successful_job_history_limit{cronjob="ActiveCronJob1NoLastScheduled",namespace="ns1"} 3
kube_cronjob_spec_suspend{cronjob="ActiveCronJob1NoLastScheduled",namespace="ns1"} 0
kube_cronjob_info{concurrency_policy="Forbid",cronjob="ActiveCronJob1NoLastScheduled",namespace="ns1",schedule="25 * * * *"} 1
kube_cronjob_created{cronjob="ActiveCronJob1NoLastScheduled",namespace="ns1"} 1.520766296e+09
kube_cronjob_labels{cronjob="ActiveCronJob1NoLastScheduled",namespace="ns1"} 1
` +
fmt.Sprintf("kube_cronjob_next_schedule_time{cronjob=\"ActiveCronJob1NoLastScheduled\",namespace=\"ns1\"} %ve+09\n",
float64(ActiveCronJob1NoLastScheduledNextScheduleTime.Unix())/math.Pow10(9)),
MetricNames: []string{"kube_cronjob_next_schedule_time", "kube_cronjob_spec_starting_deadline_seconds", "kube_cronjob_status_active", "kube_cronjob_metadata_resource_version", "kube_cronjob_spec_suspend", "kube_cronjob_info", "kube_cronjob_created", "kube_cronjob_labels"},
MetricNames: []string{"kube_cronjob_next_schedule_time", "kube_cronjob_spec_starting_deadline_seconds", "kube_cronjob_status_active", "kube_cronjob_metadata_resource_version", "kube_cronjob_spec_suspend", "kube_cronjob_info", "kube_cronjob_created", "kube_cronjob_labels", "kube_cronjob_spec_successful_job_history_limit", "kube_cronjob_spec_failed_job_history_limit"},
},
}
for i, c := range cases {
Expand Down
2 changes: 2 additions & 0 deletions internal/store/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ func createNodeInfoFamilyGenerator() generator.FamilyGenerator {
"kubeproxy_version",
"provider_id",
"pod_cidr",
"system_uuid",
}
labelValues := []string{
n.Status.NodeInfo.KernelVersion,
Expand All @@ -99,6 +100,7 @@ func createNodeInfoFamilyGenerator() generator.FamilyGenerator {
n.Status.NodeInfo.KubeProxyVersion,
n.Spec.ProviderID,
n.Spec.PodCIDR,
n.Status.NodeInfo.SystemUUID,
}

internalIP := ""
Expand Down
8 changes: 5 additions & 3 deletions internal/store/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ func TestNodeStore(t *testing.T) {
KubeProxyVersion: "kubeproxy",
OSImage: "osimage",
ContainerRuntimeVersion: "rkt",
SystemUUID: "6a934e21-5207-4a84-baea-3a952d926c80",
},
Addresses: []v1.NodeAddress{
{Type: "InternalIP", Address: "1.2.3.4"},
Expand All @@ -59,7 +60,7 @@ func TestNodeStore(t *testing.T) {
# TYPE kube_node_info gauge
# TYPE kube_node_labels gauge
# TYPE kube_node_spec_unschedulable gauge
kube_node_info{container_runtime_version="rkt",kernel_version="kernel",kubelet_version="kubelet",kubeproxy_version="kubeproxy",node="127.0.0.1",os_image="osimage",pod_cidr="172.24.10.0/24",provider_id="provider://i-uniqueid",internal_ip="1.2.3.4"} 1
kube_node_info{container_runtime_version="rkt",kernel_version="kernel",kubelet_version="kubelet",kubeproxy_version="kubeproxy",node="127.0.0.1",os_image="osimage",pod_cidr="172.24.10.0/24",provider_id="provider://i-uniqueid",internal_ip="1.2.3.4",system_uuid="6a934e21-5207-4a84-baea-3a952d926c80"} 1
kube_node_labels{node="127.0.0.1"} 1
kube_node_spec_unschedulable{node="127.0.0.1"} 0
`,
Expand All @@ -75,7 +76,7 @@ func TestNodeStore(t *testing.T) {
Want: `
# HELP kube_node_info Information about a cluster node.
# TYPE kube_node_info gauge
kube_node_info{container_runtime_version="",kernel_version="",kubelet_version="",kubeproxy_version="",node="",os_image="",pod_cidr="",provider_id="",internal_ip=""} 1
kube_node_info{container_runtime_version="",kernel_version="",kubelet_version="",kubeproxy_version="",node="",os_image="",pod_cidr="",provider_id="",internal_ip="",system_uuid=""} 1
`,
MetricNames: []string{"kube_node_info"},
},
Expand All @@ -101,6 +102,7 @@ func TestNodeStore(t *testing.T) {
KubeProxyVersion: "kubeproxy",
OSImage: "osimage",
ContainerRuntimeVersion: "rkt",
SystemUUID: "6a934e21-5207-4a84-baea-3a952d926c80",
},
Addresses: []v1.NodeAddress{
{Type: "InternalIP", Address: "1.2.3.4"},
Expand Down Expand Up @@ -139,7 +141,7 @@ func TestNodeStore(t *testing.T) {
# TYPE kube_node_status_allocatable gauge
# TYPE kube_node_status_capacity gauge
kube_node_created{node="127.0.0.1"} 1.5e+09
kube_node_info{container_runtime_version="rkt",kernel_version="kernel",kubelet_version="kubelet",kubeproxy_version="kubeproxy",node="127.0.0.1",os_image="osimage",pod_cidr="172.24.10.0/24",provider_id="provider://i-randomidentifier",internal_ip="1.2.3.4"} 1
kube_node_info{container_runtime_version="rkt",kernel_version="kernel",kubelet_version="kubelet",kubeproxy_version="kubeproxy",node="127.0.0.1",os_image="osimage",pod_cidr="172.24.10.0/24",provider_id="provider://i-randomidentifier",internal_ip="1.2.3.4",system_uuid="6a934e21-5207-4a84-baea-3a952d926c80"} 1
kube_node_labels{node="127.0.0.1"} 1
kube_node_role{node="127.0.0.1",role="master"} 1
kube_node_spec_unschedulable{node="127.0.0.1"} 1
Expand Down

0 comments on commit 8780cfb

Please sign in to comment.