Merge branch 'main' into feat/makefile-supre

k8sgpt-ai · Apr 21, 2023 · 898c824 · 898c824
2 parents 13b2a13 + 9530543
commit 898c824
Show file tree

Hide file tree

Showing 15 changed files with 156 additions and 114 deletions.
diff --git a/README.md b/README.md
@@ -269,22 +269,6 @@ The Kubernetes system is trying to scale a StatefulSet named fake-deployment usi
 
 </details>
 
-## Upcoming major milestones
-
-- [ ] Multiple AI backend support
-- [ ] Custom AI/ML model backend support
-- [ ] Custom analyzers
-
-## What about kubectl-ai?
-
-The kubectl-ai [project](https://github.com/sozercan/kubectl-ai) uses AI to create manifests and apply them to the
-cluster. It is not what we are trying to do here, it is focusing on writing YAML manifests.
-
-K8sgpt is focused on triaging and diagnosing issues in your cluster. It is a tool for SRE, Platform & DevOps engineers
-to help them understand what is going on in their cluster. Cutting through the noise of logs and multiple tools to find
-the root cause of an issue.
-
-
 ## Configuration
 
 `k8sgpt` stores config data in the `$XDG_CONFIG_HOME/k8sgpt/k8sgpt.yaml` file. The data is stored in plain text, including your OpenAI key.

diff --git a/cmd/analyze/analyze.go b/cmd/analyze/analyze.go
@@ -36,10 +36,11 @@ var AnalyzeCmd = &cobra.Command{
 			os.Exit(1)
 		}
 
-		err = config.RunAnalysis()
-		if err != nil {
-			color.Red("Error: %v", err)
-			os.Exit(1)
+		analysisErrors := config.RunAnalysis()
+		if len(analysisErrors) != 0 {
+			for _, err := range analysisErrors {
+				color.Red("Error: %s", err)
+			}
 		}
 
 		if explain {

diff --git a/container/manifests/deployment.yaml b/container/manifests/deployment.yaml
@@ -28,8 +28,8 @@ spec:
             cpu: "1"
             memory: "512Mi"
           requests:
-            cpu: "0.5"
-            memory: "256Mi"
+            cpu: "0.2"
+            memory: "156Mi"
         env:
         - name: K8SGPT_MODEL
           value: "gpt-3.5-turbo"

diff --git a/container/manifests/service.yaml b/container/manifests/service.yaml
@@ -3,6 +3,8 @@ kind: Service
 metadata:
   name: k8sgpt-service
   namespace: k8sgpt
+  labels:
+     app: k8sgpt
 spec:
   selector:
     app: k8sgpt

diff --git a/container/manifests/serviceMonitor.yaml b/container/manifests/serviceMonitor.yaml
@@ -0,0 +1,15 @@
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  labels:
+    app: k8sgpt
+  name: k8sgpt-service-monitor
+  namespace: k8sgpt
+spec:
+  endpoints:
+  - honorLabels: true
+    path: /metrics
+    port: http
+  selector:
+    matchLabels:
+      app: k8sgpt
diff --git a/go.mod b/go.mod
@@ -115,10 +115,10 @@ require (
 	github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
-	github.com/prometheus/client_golang v1.14.0
+	github.com/prometheus/client_golang v1.15.0
 	github.com/prometheus/client_model v0.3.0 // indirect
-	github.com/prometheus/common v0.37.0 // indirect
-	github.com/prometheus/procfs v0.8.0 // indirect
+	github.com/prometheus/common v0.42.0 // indirect
+	github.com/prometheus/procfs v0.9.0 // indirect
 	github.com/rivo/uniseg v0.4.4 // indirect
 	github.com/robfig/cron/v3 v3.0.1
 	github.com/rubenv/sql-migrate v1.3.1 // indirect

diff --git a/go.sum b/go.sum
diff --git a/pkg/ai/iai.go b/pkg/ai/iai.go
@@ -5,12 +5,17 @@ import (
 )
 
 type IAI interface {
-	Configure(token string, model string, language string) error
+	Configure(config IAIConfig, language string) error
 	GetCompletion(ctx context.Context, prompt string) (string, error)
 	Parse(ctx context.Context, prompt []string, nocache bool) (string, error)
 	GetName() string
 }
 
+type IAIConfig interface {
+	GetPassword() string
+	GetModel() string
+}
+
 func NewClient(provider string) IAI {
 	switch provider {
 	case "openai":
@@ -31,3 +36,11 @@ type AIProvider struct {
 	Model    string `mapstructure:"model"`
 	Password string `mapstructure:"password"`
 }
+
+func (p *AIProvider) GetPassword() string {
+	return p.Password
+}
+
+func (p *AIProvider) GetModel() string {
+	return p.Model
+}
diff --git a/pkg/ai/noopai.go b/pkg/ai/noopai.go
@@ -17,10 +17,11 @@ type NoOpAIClient struct {
 	model    string
 }
 
-func (c *NoOpAIClient) Configure(token string, model string, language string) error {
+func (c *NoOpAIClient) Configure(config IAIConfig, language string) error {
+	token := config.GetPassword()
 	c.language = language
 	c.client = fmt.Sprintf("I am a noop client with the token %s ", token)
-	c.model = model
+	c.model = config.GetModel()
 	return nil
 }
 

diff --git a/pkg/ai/openai.go b/pkg/ai/openai.go
@@ -15,27 +15,22 @@ import (
 	"github.com/sashabaranov/go-openai"
 )
 
-const (
-	default_prompt = "Simplify the following Kubernetes error message and provide a solution in %s: %s"
-	prompt_a       = "Read the following input %s and provide possible scenarios for remediation in %s"
-	prompt_b       = "Considering the following input from the Kubernetes resource %s and the error message %s, provide possible scenarios for remediation in %s"
-	prompt_c       = "Reading the following %s error message and it's accompanying log message %s, how would you simplify this message?"
-)
-
 type OpenAIClient struct {
 	client   *openai.Client
 	language string
 	model    string
 }
 
-func (c *OpenAIClient) Configure(token string, model string, language string) error {
-	client := openai.NewClient(token)
+func (c *OpenAIClient) Configure(config IAIConfig, language string) error {
+	token := config.GetPassword()
+	defaultConfig := openai.DefaultConfig(token)
+	client := openai.NewClientWithConfig(defaultConfig)
 	if client == nil {
 		return errors.New("error creating OpenAI client")
 	}
 	c.language = language
 	c.client = client
-	c.model = model
+	c.model = config.GetModel()
 	return nil
 }
 

diff --git a/pkg/ai/prompts.go b/pkg/ai/prompts.go
@@ -0,0 +1,8 @@
+package ai
+
+const (
+	default_prompt = "Simplify the following Kubernetes error message and provide a solution in %s: %s"
+	prompt_a       = "Read the following input %s and provide possible scenarios for remediation in %s"
+	prompt_b       = "Considering the following input from the Kubernetes resource %s and the error message %s, provide possible scenarios for remediation in %s"
+	prompt_c       = "Reading the following %s error message and it's accompanying log message %s, how would you simplify this message?"
+)
diff --git a/pkg/analysis/analysis.go b/pkg/analysis/analysis.go
@@ -5,6 +5,7 @@ import (
 	"errors"
 	"fmt"
 	"os"
+	"reflect"
 	"strings"
 
 	"github.com/fatih/color"
@@ -68,7 +69,7 @@ func NewAnalysis(backend string, language string, filters []string, namespace st
 	}
 
 	aiClient := ai.NewClient(aiProvider.Name)
-	if err := aiClient.Configure(aiProvider.Password, aiProvider.Model, language); err != nil {
+	if err := aiClient.Configure(&aiProvider, language); err != nil {
 		color.Red("Error: %v", err)
 		return nil, err
 	}
@@ -95,7 +96,7 @@ func NewAnalysis(backend string, language string, filters []string, namespace st
 	}, nil
 }
 
-func (a *Analysis) RunAnalysis() error {
+func (a *Analysis) RunAnalysis() []error {
 	activeFilters := viper.GetStringSlice("active_filters")
 
 	analyzerMap := analyzer.GetAnalyzerMap()
@@ -107,16 +108,18 @@ func (a *Analysis) RunAnalysis() error {
 		AIClient:  a.AIClient,
 	}
 
+	var errorList []error
+
 	// if there are no filters selected and no active_filters then run all of them
 	if len(a.Filters) == 0 && len(activeFilters) == 0 {
 		for _, analyzer := range analyzerMap {
 			results, err := analyzer.Analyze(analyzerConfig)
 			if err != nil {
-				return err
+				errorList = append(errorList, errors.New(fmt.Sprintf("[%s] %s", reflect.TypeOf(analyzer).Name(), err)))
 			}
 			a.Results = append(a.Results, results...)
 		}
-		return nil
+		return errorList
 	}
 
 	// if the filters flag is specified
@@ -125,27 +128,27 @@ func (a *Analysis) RunAnalysis() error {
 			if analyzer, ok := analyzerMap[filter]; ok {
 				results, err := analyzer.Analyze(analyzerConfig)
 				if err != nil {
-					return err
+					errorList = append(errorList, errors.New(fmt.Sprintf("[%s] %s", filter, err)))
 				}
 				a.Results = append(a.Results, results...)
 			} else {
-				return errors.New(fmt.Sprintf("\"%s\" filter does not exist. Please run k8sgpt filters list.", filter))
+				errorList = append(errorList, errors.New(fmt.Sprintf("\"%s\" filter does not exist. Please run k8sgpt filters list.", filter)))
 			}
 		}
-		return nil
+		return errorList
 	}
 
 	// use active_filters
 	for _, filter := range activeFilters {
 		if analyzer, ok := analyzerMap[filter]; ok {
 			results, err := analyzer.Analyze(analyzerConfig)
 			if err != nil {
-				return err
+				errorList = append(errorList, errors.New(fmt.Sprintf("[%s] %s", filter, err)))
 			}
 			a.Results = append(a.Results, results...)
 		}
 	}
-	return nil
+	return errorList
 }
 
 func (a *Analysis) GetAIResults(output string, anonymize bool) error {

diff --git a/pkg/analyzer/pod.go b/pkg/analyzer/pod.go
@@ -44,7 +44,7 @@ func (PodAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) {
 			}
 		}
 
-		// Check through container status to check for crashes
+		// Check through container status to check for crashes or unready
 		for _, containerStatus := range pod.Status.ContainerStatuses {
 			if containerStatus.State.Waiting != nil {
 				if containerStatus.State.Waiting.Reason == "CrashLoopBackOff" || containerStatus.State.Waiting.Reason == "ImagePullBackOff" {
@@ -70,6 +70,23 @@ func (PodAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) {
 						})
 					}
 				}
+			} else {
+				// when pod is Running but its ReadinessProbe fails
+				if containerStatus.Ready == false && pod.Status.Phase == "Running" {
+					// parse the event log and append details
+					evt, err := FetchLatestEvent(a.Context, a.Client, pod.Namespace, pod.Name)
+					if err != nil || evt == nil {
+						continue
+					}
+					if evt.Reason == "Unhealthy" && evt.Message != "" {
+						failures = append(failures, common.Failure{
+							Text:      evt.Message,
+							Sensitive: []common.Sensitive{},
+						})
+
+					}
+
+				}
 			}
 		}
 		if len(failures) > 0 {

diff --git a/pkg/analyzer/pod_test.go b/pkg/analyzer/pod_test.go
@@ -14,23 +14,65 @@ import (
 
 func TestPodAnalyzer(t *testing.T) {
 
-	clientset := fake.NewSimpleClientset(&v1.Pod{
-		ObjectMeta: metav1.ObjectMeta{
-			Name:        "example",
-			Namespace:   "default",
-			Annotations: map[string]string{},
+	clientset := fake.NewSimpleClientset(
+		&v1.Pod{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:        "example",
+				Namespace:   "default",
+				Annotations: map[string]string{},
+			},
+			Status: v1.PodStatus{
+				Phase: v1.PodPending,
+				Conditions: []v1.PodCondition{
+					{
+						Type:    v1.PodScheduled,
+						Reason:  "Unschedulable",
+						Message: "0/1 nodes are available: 1 node(s) had taint {node-role.kubernetes.io/master: }, that the pod didn't tolerate.",
+					},
+				},
+			},
 		},
-		Status: v1.PodStatus{
-			Phase: v1.PodPending,
-			Conditions: []v1.PodCondition{
-				{
-					Type:    v1.PodScheduled,
-					Reason:  "Unschedulable",
-					Message: "0/1 nodes are available: 1 node(s) had taint {node-role.kubernetes.io/master: }, that the pod didn't tolerate.",
+		&v1.Pod{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      "example2",
+				Namespace: "default",
+			},
+			Status: v1.PodStatus{
+				Phase: v1.PodRunning,
+				ContainerStatuses: []v1.ContainerStatus{
+					{
+						Name:  "example2",
+						Ready: false,
+					},
+				},
+				Conditions: []v1.PodCondition{
+					{
+						Type:    v1.ContainersReady,
+						Reason:  "ContainersNotReady",
+						Message: "containers with unready status: [example2]",
+					},
 				},
 			},
 		},
-	})
+		// simulate event: 30s         Warning   Unhealthy              pod/my-nginx-7fb4dbcf47-4ch4w                         Readiness probe failed: bash: xxxx: command not found
+		&v1.Event{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      "foo",
+				Namespace: "default",
+			},
+			InvolvedObject: v1.ObjectReference{
+				Kind:       "Pod",
+				Name:       "example2",
+				Namespace:  "default",
+				UID:        "differentUid",
+				APIVersion: "v1",
+			},
+			Reason:  "Unhealthy",
+			Message: "readiness probe failed: the detail reason here ...",
+			Source:  v1.EventSource{Component: "eventTest"},
+			Count:   1,
+			Type:    v1.EventTypeWarning,
+		})
 
 	config := common.Analyzer{
 		Client: &kubernetes.Client{
@@ -45,7 +87,7 @@ func TestPodAnalyzer(t *testing.T) {
 	if err != nil {
 		t.Error(err)
 	}
-	assert.Equal(t, len(analysisResults), 1)
+	assert.Equal(t, len(analysisResults), 2)
 }
 
 func TestPodAnalyzerNamespaceFiltering(t *testing.T) {

diff --git a/pkg/server/server.go b/pkg/server/server.go
@@ -55,12 +55,14 @@ func (s *Config) analyzeHandler(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	err = config.RunAnalysis()
-	if err != nil {
-		color.Red("Error: %v", err)
+	analysisErrors := config.RunAnalysis()
+	if analysisErrors != nil {
+		var errorMessage string
+		for _, err := range analysisErrors {
+			errorMessage += err.Error() + "\n"
+		}
+		http.Error(w, errorMessage, http.StatusInternalServerError)
 		health.Failure++
-		http.Error(w, err.Error(), http.StatusInternalServerError)
-		return
 	}
 
 	if explain {