Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Some misc fixes #1

Merged
merged 7 commits into from
Aug 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ on:
types: [created]

permissions:
contents: write
packages: write
contents: write
packages: write

jobs:
releases-matrix:
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
name: Go package
name: run-tests

on: [push]

jobs:
build:
name: run-tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ Or a combination of both:
kube-doctor --label-selector app.kubernetes.io/name=prometheus --namespace monitoring
```

Non-namespaced resources are checked separately and can be enalbed with the `--non-namespaced-resources` flag:
Non-namespaced resources like nodes can be checked with the `--non-namespaced-resources` flag:

```shell
kube-doctor --non-namespaced-resources
Expand Down
2 changes: 1 addition & 1 deletion pkg/checkup/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func checkContainer(container v1.Container) (results symptoms.ContainerSymptomLi
results.Add(symptoms.ContainerSymptom{
Name: container.Name,
Message: "memory request and limit are not equal",
Severity: "critical",
Severity: "warning",
})
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/checkup/container_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,5 +72,5 @@ func TestContainerMemoryRequestLimitNotEqual(t *testing.T) {

assert.Len(t, result.Symptoms, 1)
assert.Equal(t, "memory request and limit are not equal", result.Symptoms[0].Message)
assert.Equal(t, "critical", result.Symptoms[0].Severity)
assert.Equal(t, "warning", result.Symptoms[0].Severity)
}
2 changes: 1 addition & 1 deletion pkg/checkup/daemonsets.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func CheckDaemonSets(resources *v1.DaemonSetList) (results symptoms.SymptomList)
}
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
4 changes: 2 additions & 2 deletions pkg/checkup/deployments.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func CheckDeployments(resources *appsv1.DeploymentList) (results symptoms.Sympto
})
}
if condition.Reason == "ReplicaSetUpdated" && condition.Type == "Progressing" {
if time.Now().Sub(condition.LastUpdateTime.Time).Minutes() > 10 {
if time.Since(condition.LastUpdateTime.Time).Minutes() > 10 {
results.Add(symptoms.Symptom{
Message: "ReplicaSet update in progress but no progress for 10 minutes or longer",
Severity: "critical",
Expand Down Expand Up @@ -72,7 +72,7 @@ func CheckDeployments(resources *appsv1.DeploymentList) (results symptoms.Sympto
}
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
2 changes: 1 addition & 1 deletion pkg/checkup/endpoints.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func CheckEndpoints(resources *v1.EndpointsList) (results symptoms.SymptomList)
}
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
10 changes: 6 additions & 4 deletions pkg/checkup/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ func CheckEvents(resources *v1.EventList) (results symptoms.SymptomList) {
if event.Source.Component == "cluster-autoscaler" {
if event.Reason == "ScaleDown" || event.Reason == "TriggeredScaleUp" || event.Type != "Normal" {
results.Add(symptoms.Symptom{
Message: fmt.Sprintf("(%s) %.1f minutes ago: %s", event.InvolvedObject.Kind, time.Now().Sub(event.LastTimestamp.Time).Minutes(), event.Message),
Message: fmt.Sprintf("(%s) %.1f minutes ago: %s", event.InvolvedObject.Kind, time.Since(event.LastTimestamp.Time).Minutes(), event.Message),
Severity: "critical",
ResourceName: event.InvolvedObject.Name,
ResourceType: resourceType,
Expand All @@ -33,7 +33,7 @@ func CheckEvents(resources *v1.EventList) (results symptoms.SymptomList) {

if event.Type != "Normal" && event.Source.Component == "service-controller" {
results.Add(symptoms.Symptom{
Message: fmt.Sprintf("(%s) %.1f minutes ago: %s", event.InvolvedObject.Kind, time.Now().Sub(event.LastTimestamp.Time).Minutes(), event.Message),
Message: fmt.Sprintf("(%s) %.1f minutes ago: %s", event.InvolvedObject.Kind, time.Since(event.LastTimestamp.Time).Minutes(), event.Message),
Severity: "critical",
ResourceName: event.InvolvedObject.Name,
ResourceType: resourceType,
Expand All @@ -43,7 +43,7 @@ func CheckEvents(resources *v1.EventList) (results symptoms.SymptomList) {

if event.Type != "Normal" && event.Source.Component == "default-scheduler" && event.Reason != "FailedScheduling" {
results.Add(symptoms.Symptom{
Message: fmt.Sprintf("(%s) %.1f minutes ago: %s", event.InvolvedObject.Kind, time.Now().Sub(event.LastTimestamp.Time).Minutes(), event.Message),
Message: fmt.Sprintf("(%s) %.1f minutes ago: %s", event.InvolvedObject.Kind, time.Since(event.LastTimestamp.Time).Minutes(), event.Message),
Severity: "critical",
ResourceName: event.InvolvedObject.Name,
ResourceType: resourceType,
Expand All @@ -53,7 +53,7 @@ func CheckEvents(resources *v1.EventList) (results symptoms.SymptomList) {

if event.Type != "Normal" && event.Source.Component == "kubelet" && event.Reason != "Unhealthy" {
results.Add(symptoms.Symptom{
Message: fmt.Sprintf("(%s) %.1f minutes ago: %s", event.InvolvedObject.Kind, time.Now().Sub(event.LastTimestamp.Time).Minutes(), event.Message),
Message: fmt.Sprintf("(%s) %.1f minutes ago: %s", event.InvolvedObject.Kind, time.Since(event.LastTimestamp.Time).Minutes(), event.Message),
Severity: "critical",
ResourceName: event.InvolvedObject.Name,
ResourceType: resourceType,
Expand All @@ -62,5 +62,7 @@ func CheckEvents(resources *v1.EventList) (results symptoms.SymptomList) {
}
}

log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
2 changes: 1 addition & 1 deletion pkg/checkup/horizontalpodautoscalers.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ func CheckHpas(resources *autoscaling.HorizontalPodAutoscalerList) (results symp
}
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
4 changes: 2 additions & 2 deletions pkg/checkup/jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ func CheckJobs(resources *batchv1.JobList) (results symptoms.SymptomList) {
log.Debug(fmt.Sprintf("Examining Job %s/%s", job.Namespace, job.Name))

// Ignore jobs older than 1 hour
if job.Status.CompletionTime != nil && time.Now().Sub(job.Status.CompletionTime.Time).Minutes() > 60 {
if job.Status.CompletionTime != nil && time.Since(job.Status.CompletionTime.Time).Minutes() > 60 {
continue
}

Expand All @@ -39,7 +39,7 @@ func CheckJobs(resources *batchv1.JobList) (results symptoms.SymptomList) {
}
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
2 changes: 1 addition & 1 deletion pkg/checkup/kubeapihealth.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func KubeApiHealthStatuses(resources *statuses.KubeApiHealthEndpointStatusList)
}
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
4 changes: 2 additions & 2 deletions pkg/checkup/nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func CheckNodes(resources *v1.NodeList) (results symptoms.SymptomList) {
for _, condition := range node.Status.Conditions {
if condition.Type == "Ready" {
if condition.Status != "True" {
if time.Now().Sub(node.ObjectMeta.CreationTimestamp.Time).Minutes() > 5 {
if time.Since(node.ObjectMeta.CreationTimestamp.Time).Minutes() > 5 {
results.Add(symptoms.Symptom{
Message: "older than 5 minutes and not Ready",
Severity: "critical",
Expand Down Expand Up @@ -59,7 +59,7 @@ func CheckNodes(resources *v1.NodeList) (results symptoms.SymptomList) {
})
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
4 changes: 2 additions & 2 deletions pkg/checkup/persistentvolumeclaims.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func CheckPersistentVolumeClaims(resources *v1.PersistentVolumeClaimList) (resul
for _, pvc := range resources.Items {
log.Debug(fmt.Sprintf("Examining PersistentVolumeClaim %s/%s", pvc.Name, pvc.Namespace))

if pvc.Status.Phase != "Bound" && time.Now().Sub(pvc.CreationTimestamp.Time).Minutes() > 5 {
if pvc.Status.Phase != "Bound" && time.Since(pvc.CreationTimestamp.Time).Minutes() > 5 {
results.Add(symptoms.Symptom{
Message: "older than 5 minutes and status is not bound",
Severity: "critical",
Expand All @@ -27,7 +27,7 @@ func CheckPersistentVolumeClaims(resources *v1.PersistentVolumeClaimList) (resul
}
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
4 changes: 2 additions & 2 deletions pkg/checkup/persistentvolumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func CheckPersistentVolumes(resources *v1.PersistentVolumeList) (results symptom
for _, volume := range resources.Items {
log.Debug(fmt.Sprintf("Examining PersistentVolume %s", volume.Name))

if volume.Status.Phase != "Bound" && time.Now().Sub(volume.CreationTimestamp.Time).Minutes() > 5 {
if volume.Status.Phase != "Bound" && time.Since(volume.CreationTimestamp.Time).Minutes() > 5 {
results.Add(symptoms.Symptom{
Message: "older than 5 minutes and status is not bound",
Severity: "critical",
Expand All @@ -27,7 +27,7 @@ func CheckPersistentVolumes(resources *v1.PersistentVolumeList) (results symptom
}
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
12 changes: 6 additions & 6 deletions pkg/checkup/pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ func CheckPods(resources *v1.PodList) (results symptoms.SymptomList) {
log.Debug(fmt.Sprintf("Examining Pod %s/%s", pod.Namespace, pod.Name))

if pod.Status.Phase == "Succeeded" {
return
continue
}

if pod.Status.Phase != "Running" {
Expand All @@ -45,9 +45,9 @@ func CheckPods(resources *v1.PodList) (results symptoms.SymptomList) {

for _, scs := range pod.Status.ContainerStatuses {
if !scs.Ready {
if time.Now().Sub(pod.Status.StartTime.Time).Minutes() < 3 {
if time.Since(pod.Status.StartTime.Time).Minutes() < 3 {
results.Add(symptoms.Symptom{
Message: fmt.Sprintf("container '%s' is not ready but pod started %.1f mins ago", scs.Name, pod.Status.StartTime.Sub(time.Now()).Minutes()),
Message: fmt.Sprintf("container '%s' is not ready but pod started %.1f mins ago", scs.Name, time.Since(pod.Status.StartTime.Time).Minutes()),
Severity: "warning",
ResourceName: pod.Name,
ResourceType: resourceType,
Expand All @@ -65,7 +65,7 @@ func CheckPods(resources *v1.PodList) (results symptoms.SymptomList) {
}

if scs.RestartCount != 0 {
if time.Now().Sub(scs.LastTerminationState.Terminated.FinishedAt.Time).Hours() > 1 {
if time.Since(scs.LastTerminationState.Terminated.FinishedAt.Time).Hours() > 1 {
results.Add(symptoms.Symptom{
Message: fmt.Sprintf("container '%s' has been restarted %d times", scs.Name, scs.RestartCount),
Severity: "warning",
Expand All @@ -77,7 +77,7 @@ func CheckPods(resources *v1.PodList) (results symptoms.SymptomList) {
results.Add(symptoms.Symptom{
Message: fmt.Sprintf("container '%s' was restarted %.1f mins ago: %d (exit code) %s (reason)",
scs.Name,
scs.LastTerminationState.Terminated.FinishedAt.Sub(time.Now()).Minutes(),
time.Since(scs.LastTerminationState.Terminated.FinishedAt.Time).Minutes(),
scs.LastTerminationState.Terminated.ExitCode,
scs.LastTerminationState.Terminated.Reason,
),
Expand All @@ -101,7 +101,7 @@ func CheckPods(resources *v1.PodList) (results symptoms.SymptomList) {
}
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
4 changes: 2 additions & 2 deletions pkg/checkup/pods_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func TestCheckPodsBadContainerStatuses(t *testing.T) {
result := CheckPods(&dummyResources)

assert.Len(t, result.Symptoms, 1)
assert.Equal(t, "container 'c1' is not ready but pod started -0.0 mins ago", result.Symptoms[0].Message)
assert.Equal(t, "container 'c1' is not ready but pod started 0.0 mins ago", result.Symptoms[0].Message)
assert.Equal(t, "warning", result.Symptoms[0].Severity)
}

Expand Down Expand Up @@ -220,7 +220,7 @@ func TestCheckPodsWithRestarts(t *testing.T) {
result := CheckPods(&dummyResources)

assert.Len(t, result.Symptoms, 1)
assert.Equal(t, "container 'c1' was restarted -0.0 mins ago: 1 (exit code) Crashed (reason)", result.Symptoms[0].Message)
assert.Equal(t, "container 'c1' was restarted 0.0 mins ago: 1 (exit code) Crashed (reason)", result.Symptoms[0].Message)
assert.Equal(t, "critical", result.Symptoms[0].Severity)
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/checkup/services.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ func CheckServices(resources *v1.ServiceList) (results symptoms.SymptomList) {
}
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
12 changes: 12 additions & 0 deletions pkg/checkup/symptoms/symptoms.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@ func (l *SymptomList) Add(s Symptom) {
l.Symptoms = append(l.Symptoms, s)
}

func (l *SymptomList) CountSymptomsSeverity() (c [2]int) {
for _, s := range l.Symptoms {
if s.Severity == "critical" {
c[0]++
} else {
c[1]++
}
}

return c
}

type ContainerSymptom struct {
Name string
Severity string `validate:"oneof=warning critical"`
Expand Down
7 changes: 2 additions & 5 deletions pkg/doctor/checkup.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,18 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func DoCheckUp(cCtx *cli.Context) error {
func DoCheckUp(cCtx *cli.Context) {
log.Setup(cCtx.Bool("debug"), cCtx.Bool("warning-symptoms"))
log.Debug(fmt.Sprintf("Connected to cluster from context %s running version %s", kubernetes.ContextName, kubernetes.ServerVersion))

checkNonNamespaced := cCtx.Bool("non-namespaced")
checkNonNamespaced := cCtx.Bool("non-namespaced-resources")
namespace := cCtx.String("namespace")
labelSelector := cCtx.String("label-selector")

if checkNonNamespaced {
log.LogSymptoms(checkup.CheckNodes(kubernetes.GetNodes()))
log.LogSymptoms(checkup.CheckPersistentVolumes(kubernetes.GetPersistentVolumes()))
log.LogSymptoms(checkup.KubeApiHealthStatuses(kubernetes.GetKubeApiHealth()))
return nil
}

log.LogSymptoms(checkup.CheckDaemonSets(kubernetes.GetDaemonSets(namespace, metav1.ListOptions{LabelSelector: labelSelector})))
Expand All @@ -34,6 +33,4 @@ func DoCheckUp(cCtx *cli.Context) error {
log.LogSymptoms(checkup.CheckPersistentVolumeClaims(kubernetes.GetPersistentVolumeClaims(namespace, metav1.ListOptions{LabelSelector: labelSelector})))
log.LogSymptoms(checkup.CheckPods(kubernetes.GetPods(namespace, metav1.ListOptions{LabelSelector: labelSelector})))
log.LogSymptoms(checkup.CheckServices(kubernetes.GetServices(namespace, metav1.ListOptions{LabelSelector: labelSelector})))

return nil
}
Loading