Skip to content

Commit

Permalink
Some misc fixes (#1)
Browse files Browse the repository at this point in the history
* Some misc fixes

* fix this test

* fix these 2 tests also

* Use time.Since, much nicer

* some more golangci-lint suggestions

* fixes to logging and more

---------

Co-authored-by: Max Williams <[email protected]>
  • Loading branch information
max-rocket-internet and max-rocket-internet authored Aug 23, 2023
1 parent 6b950f7 commit 95e23b5
Show file tree
Hide file tree
Showing 21 changed files with 72 additions and 45 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ on:
types: [created]

permissions:
contents: write
packages: write
contents: write
packages: write

jobs:
releases-matrix:
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
name: Go package
name: run-tests

on: [push]

jobs:
build:
name: run-tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ Or a combination of both:
kube-doctor --label-selector app.kubernetes.io/name=prometheus --namespace monitoring
```

Non-namespaced resources are checked separately and can be enalbed with the `--non-namespaced-resources` flag:
Non-namespaced resources like nodes can be checked with the `--non-namespaced-resources` flag:

```shell
kube-doctor --non-namespaced-resources
Expand Down
2 changes: 1 addition & 1 deletion pkg/checkup/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func checkContainer(container v1.Container) (results symptoms.ContainerSymptomLi
results.Add(symptoms.ContainerSymptom{
Name: container.Name,
Message: "memory request and limit are not equal",
Severity: "critical",
Severity: "warning",
})
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/checkup/container_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,5 +72,5 @@ func TestContainerMemoryRequestLimitNotEqual(t *testing.T) {

assert.Len(t, result.Symptoms, 1)
assert.Equal(t, "memory request and limit are not equal", result.Symptoms[0].Message)
assert.Equal(t, "critical", result.Symptoms[0].Severity)
assert.Equal(t, "warning", result.Symptoms[0].Severity)
}
2 changes: 1 addition & 1 deletion pkg/checkup/daemonsets.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func CheckDaemonSets(resources *v1.DaemonSetList) (results symptoms.SymptomList)
}
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
4 changes: 2 additions & 2 deletions pkg/checkup/deployments.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func CheckDeployments(resources *appsv1.DeploymentList) (results symptoms.Sympto
})
}
if condition.Reason == "ReplicaSetUpdated" && condition.Type == "Progressing" {
if time.Now().Sub(condition.LastUpdateTime.Time).Minutes() > 10 {
if time.Since(condition.LastUpdateTime.Time).Minutes() > 10 {
results.Add(symptoms.Symptom{
Message: "ReplicaSet update in progress but no progress for 10 minutes or longer",
Severity: "critical",
Expand Down Expand Up @@ -72,7 +72,7 @@ func CheckDeployments(resources *appsv1.DeploymentList) (results symptoms.Sympto
}
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
2 changes: 1 addition & 1 deletion pkg/checkup/endpoints.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func CheckEndpoints(resources *v1.EndpointsList) (results symptoms.SymptomList)
}
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
10 changes: 6 additions & 4 deletions pkg/checkup/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ func CheckEvents(resources *v1.EventList) (results symptoms.SymptomList) {
if event.Source.Component == "cluster-autoscaler" {
if event.Reason == "ScaleDown" || event.Reason == "TriggeredScaleUp" || event.Type != "Normal" {
results.Add(symptoms.Symptom{
Message: fmt.Sprintf("(%s) %.1f minutes ago: %s", event.InvolvedObject.Kind, time.Now().Sub(event.LastTimestamp.Time).Minutes(), event.Message),
Message: fmt.Sprintf("(%s) %.1f minutes ago: %s", event.InvolvedObject.Kind, time.Since(event.LastTimestamp.Time).Minutes(), event.Message),
Severity: "critical",
ResourceName: event.InvolvedObject.Name,
ResourceType: resourceType,
Expand All @@ -33,7 +33,7 @@ func CheckEvents(resources *v1.EventList) (results symptoms.SymptomList) {

if event.Type != "Normal" && event.Source.Component == "service-controller" {
results.Add(symptoms.Symptom{
Message: fmt.Sprintf("(%s) %.1f minutes ago: %s", event.InvolvedObject.Kind, time.Now().Sub(event.LastTimestamp.Time).Minutes(), event.Message),
Message: fmt.Sprintf("(%s) %.1f minutes ago: %s", event.InvolvedObject.Kind, time.Since(event.LastTimestamp.Time).Minutes(), event.Message),
Severity: "critical",
ResourceName: event.InvolvedObject.Name,
ResourceType: resourceType,
Expand All @@ -43,7 +43,7 @@ func CheckEvents(resources *v1.EventList) (results symptoms.SymptomList) {

if event.Type != "Normal" && event.Source.Component == "default-scheduler" && event.Reason != "FailedScheduling" {
results.Add(symptoms.Symptom{
Message: fmt.Sprintf("(%s) %.1f minutes ago: %s", event.InvolvedObject.Kind, time.Now().Sub(event.LastTimestamp.Time).Minutes(), event.Message),
Message: fmt.Sprintf("(%s) %.1f minutes ago: %s", event.InvolvedObject.Kind, time.Since(event.LastTimestamp.Time).Minutes(), event.Message),
Severity: "critical",
ResourceName: event.InvolvedObject.Name,
ResourceType: resourceType,
Expand All @@ -53,7 +53,7 @@ func CheckEvents(resources *v1.EventList) (results symptoms.SymptomList) {

if event.Type != "Normal" && event.Source.Component == "kubelet" && event.Reason != "Unhealthy" {
results.Add(symptoms.Symptom{
Message: fmt.Sprintf("(%s) %.1f minutes ago: %s", event.InvolvedObject.Kind, time.Now().Sub(event.LastTimestamp.Time).Minutes(), event.Message),
Message: fmt.Sprintf("(%s) %.1f minutes ago: %s", event.InvolvedObject.Kind, time.Since(event.LastTimestamp.Time).Minutes(), event.Message),
Severity: "critical",
ResourceName: event.InvolvedObject.Name,
ResourceType: resourceType,
Expand All @@ -62,5 +62,7 @@ func CheckEvents(resources *v1.EventList) (results symptoms.SymptomList) {
}
}

log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
2 changes: 1 addition & 1 deletion pkg/checkup/horizontalpodautoscalers.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ func CheckHpas(resources *autoscaling.HorizontalPodAutoscalerList) (results symp
}
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
4 changes: 2 additions & 2 deletions pkg/checkup/jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ func CheckJobs(resources *batchv1.JobList) (results symptoms.SymptomList) {
log.Debug(fmt.Sprintf("Examining Job %s/%s", job.Namespace, job.Name))

// Ignore jobs older than 1 hour
if job.Status.CompletionTime != nil && time.Now().Sub(job.Status.CompletionTime.Time).Minutes() > 60 {
if job.Status.CompletionTime != nil && time.Since(job.Status.CompletionTime.Time).Minutes() > 60 {
continue
}

Expand All @@ -39,7 +39,7 @@ func CheckJobs(resources *batchv1.JobList) (results symptoms.SymptomList) {
}
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
2 changes: 1 addition & 1 deletion pkg/checkup/kubeapihealth.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func KubeApiHealthStatuses(resources *statuses.KubeApiHealthEndpointStatusList)
}
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
4 changes: 2 additions & 2 deletions pkg/checkup/nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func CheckNodes(resources *v1.NodeList) (results symptoms.SymptomList) {
for _, condition := range node.Status.Conditions {
if condition.Type == "Ready" {
if condition.Status != "True" {
if time.Now().Sub(node.ObjectMeta.CreationTimestamp.Time).Minutes() > 5 {
if time.Since(node.ObjectMeta.CreationTimestamp.Time).Minutes() > 5 {
results.Add(symptoms.Symptom{
Message: "older than 5 minutes and not Ready",
Severity: "critical",
Expand Down Expand Up @@ -59,7 +59,7 @@ func CheckNodes(resources *v1.NodeList) (results symptoms.SymptomList) {
})
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
4 changes: 2 additions & 2 deletions pkg/checkup/persistentvolumeclaims.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func CheckPersistentVolumeClaims(resources *v1.PersistentVolumeClaimList) (resul
for _, pvc := range resources.Items {
log.Debug(fmt.Sprintf("Examining PersistentVolumeClaim %s/%s", pvc.Name, pvc.Namespace))

if pvc.Status.Phase != "Bound" && time.Now().Sub(pvc.CreationTimestamp.Time).Minutes() > 5 {
if pvc.Status.Phase != "Bound" && time.Since(pvc.CreationTimestamp.Time).Minutes() > 5 {
results.Add(symptoms.Symptom{
Message: "older than 5 minutes and status is not bound",
Severity: "critical",
Expand All @@ -27,7 +27,7 @@ func CheckPersistentVolumeClaims(resources *v1.PersistentVolumeClaimList) (resul
}
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
4 changes: 2 additions & 2 deletions pkg/checkup/persistentvolumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func CheckPersistentVolumes(resources *v1.PersistentVolumeList) (results symptom
for _, volume := range resources.Items {
log.Debug(fmt.Sprintf("Examining PersistentVolume %s", volume.Name))

if volume.Status.Phase != "Bound" && time.Now().Sub(volume.CreationTimestamp.Time).Minutes() > 5 {
if volume.Status.Phase != "Bound" && time.Since(volume.CreationTimestamp.Time).Minutes() > 5 {
results.Add(symptoms.Symptom{
Message: "older than 5 minutes and status is not bound",
Severity: "critical",
Expand All @@ -27,7 +27,7 @@ func CheckPersistentVolumes(resources *v1.PersistentVolumeList) (results symptom
}
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
12 changes: 6 additions & 6 deletions pkg/checkup/pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ func CheckPods(resources *v1.PodList) (results symptoms.SymptomList) {
log.Debug(fmt.Sprintf("Examining Pod %s/%s", pod.Namespace, pod.Name))

if pod.Status.Phase == "Succeeded" {
return
continue
}

if pod.Status.Phase != "Running" {
Expand All @@ -45,9 +45,9 @@ func CheckPods(resources *v1.PodList) (results symptoms.SymptomList) {

for _, scs := range pod.Status.ContainerStatuses {
if !scs.Ready {
if time.Now().Sub(pod.Status.StartTime.Time).Minutes() < 3 {
if time.Since(pod.Status.StartTime.Time).Minutes() < 3 {
results.Add(symptoms.Symptom{
Message: fmt.Sprintf("container '%s' is not ready but pod started %.1f mins ago", scs.Name, pod.Status.StartTime.Sub(time.Now()).Minutes()),
Message: fmt.Sprintf("container '%s' is not ready but pod started %.1f mins ago", scs.Name, time.Since(pod.Status.StartTime.Time).Minutes()),
Severity: "warning",
ResourceName: pod.Name,
ResourceType: resourceType,
Expand All @@ -65,7 +65,7 @@ func CheckPods(resources *v1.PodList) (results symptoms.SymptomList) {
}

if scs.RestartCount != 0 {
if time.Now().Sub(scs.LastTerminationState.Terminated.FinishedAt.Time).Hours() > 1 {
if time.Since(scs.LastTerminationState.Terminated.FinishedAt.Time).Hours() > 1 {
results.Add(symptoms.Symptom{
Message: fmt.Sprintf("container '%s' has been restarted %d times", scs.Name, scs.RestartCount),
Severity: "warning",
Expand All @@ -77,7 +77,7 @@ func CheckPods(resources *v1.PodList) (results symptoms.SymptomList) {
results.Add(symptoms.Symptom{
Message: fmt.Sprintf("container '%s' was restarted %.1f mins ago: %d (exit code) %s (reason)",
scs.Name,
scs.LastTerminationState.Terminated.FinishedAt.Sub(time.Now()).Minutes(),
time.Since(scs.LastTerminationState.Terminated.FinishedAt.Time).Minutes(),
scs.LastTerminationState.Terminated.ExitCode,
scs.LastTerminationState.Terminated.Reason,
),
Expand All @@ -101,7 +101,7 @@ func CheckPods(resources *v1.PodList) (results symptoms.SymptomList) {
}
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
4 changes: 2 additions & 2 deletions pkg/checkup/pods_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func TestCheckPodsBadContainerStatuses(t *testing.T) {
result := CheckPods(&dummyResources)

assert.Len(t, result.Symptoms, 1)
assert.Equal(t, "container 'c1' is not ready but pod started -0.0 mins ago", result.Symptoms[0].Message)
assert.Equal(t, "container 'c1' is not ready but pod started 0.0 mins ago", result.Symptoms[0].Message)
assert.Equal(t, "warning", result.Symptoms[0].Severity)
}

Expand Down Expand Up @@ -220,7 +220,7 @@ func TestCheckPodsWithRestarts(t *testing.T) {
result := CheckPods(&dummyResources)

assert.Len(t, result.Symptoms, 1)
assert.Equal(t, "container 'c1' was restarted -0.0 mins ago: 1 (exit code) Crashed (reason)", result.Symptoms[0].Message)
assert.Equal(t, "container 'c1' was restarted 0.0 mins ago: 1 (exit code) Crashed (reason)", result.Symptoms[0].Message)
assert.Equal(t, "critical", result.Symptoms[0].Severity)
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/checkup/services.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ func CheckServices(resources *v1.ServiceList) (results symptoms.SymptomList) {
}
}

log.PrintEnd(len(resources.Items), len(results.Symptoms))
log.PrintEnd(len(resources.Items), results.CountSymptomsSeverity())

return results
}
12 changes: 12 additions & 0 deletions pkg/checkup/symptoms/symptoms.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@ func (l *SymptomList) Add(s Symptom) {
l.Symptoms = append(l.Symptoms, s)
}

func (l *SymptomList) CountSymptomsSeverity() (c [2]int) {
for _, s := range l.Symptoms {
if s.Severity == "critical" {
c[0]++
} else {
c[1]++
}
}

return c
}

type ContainerSymptom struct {
Name string
Severity string `validate:"oneof=warning critical"`
Expand Down
7 changes: 2 additions & 5 deletions pkg/doctor/checkup.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,18 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func DoCheckUp(cCtx *cli.Context) error {
func DoCheckUp(cCtx *cli.Context) {
log.Setup(cCtx.Bool("debug"), cCtx.Bool("warning-symptoms"))
log.Debug(fmt.Sprintf("Connected to cluster from context %s running version %s", kubernetes.ContextName, kubernetes.ServerVersion))

checkNonNamespaced := cCtx.Bool("non-namespaced")
checkNonNamespaced := cCtx.Bool("non-namespaced-resources")
namespace := cCtx.String("namespace")
labelSelector := cCtx.String("label-selector")

if checkNonNamespaced {
log.LogSymptoms(checkup.CheckNodes(kubernetes.GetNodes()))
log.LogSymptoms(checkup.CheckPersistentVolumes(kubernetes.GetPersistentVolumes()))
log.LogSymptoms(checkup.KubeApiHealthStatuses(kubernetes.GetKubeApiHealth()))
return nil
}

log.LogSymptoms(checkup.CheckDaemonSets(kubernetes.GetDaemonSets(namespace, metav1.ListOptions{LabelSelector: labelSelector})))
Expand All @@ -34,6 +33,4 @@ func DoCheckUp(cCtx *cli.Context) error {
log.LogSymptoms(checkup.CheckPersistentVolumeClaims(kubernetes.GetPersistentVolumeClaims(namespace, metav1.ListOptions{LabelSelector: labelSelector})))
log.LogSymptoms(checkup.CheckPods(kubernetes.GetPods(namespace, metav1.ListOptions{LabelSelector: labelSelector})))
log.LogSymptoms(checkup.CheckServices(kubernetes.GetServices(namespace, metav1.ListOptions{LabelSelector: labelSelector})))

return nil
}
Loading

0 comments on commit 95e23b5

Please sign in to comment.