Skip to content

Commit

Permalink
Merge branch 'main' into feat/makefile-supre
Browse files Browse the repository at this point in the history
  • Loading branch information
cubxxw committed Apr 21, 2023
2 parents 13b2a13 + 9530543 commit 898c824
Show file tree
Hide file tree
Showing 15 changed files with 156 additions and 114 deletions.
16 changes: 0 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -269,22 +269,6 @@ The Kubernetes system is trying to scale a StatefulSet named fake-deployment usi

</details>

## Upcoming major milestones

- [ ] Multiple AI backend support
- [ ] Custom AI/ML model backend support
- [ ] Custom analyzers

## What about kubectl-ai?

The kubectl-ai [project](https://github.com/sozercan/kubectl-ai) uses AI to create manifests and apply them to the
cluster. It is not what we are trying to do here, it is focusing on writing YAML manifests.

K8sgpt is focused on triaging and diagnosing issues in your cluster. It is a tool for SRE, Platform & DevOps engineers
to help them understand what is going on in their cluster. Cutting through the noise of logs and multiple tools to find
the root cause of an issue.


## Configuration

`k8sgpt` stores config data in the `$XDG_CONFIG_HOME/k8sgpt/k8sgpt.yaml` file. The data is stored in plain text, including your OpenAI key.
Expand Down
9 changes: 5 additions & 4 deletions cmd/analyze/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,11 @@ var AnalyzeCmd = &cobra.Command{
os.Exit(1)
}

err = config.RunAnalysis()
if err != nil {
color.Red("Error: %v", err)
os.Exit(1)
analysisErrors := config.RunAnalysis()
if len(analysisErrors) != 0 {
for _, err := range analysisErrors {
color.Red("Error: %s", err)
}
}

if explain {
Expand Down
4 changes: 2 additions & 2 deletions container/manifests/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ spec:
cpu: "1"
memory: "512Mi"
requests:
cpu: "0.5"
memory: "256Mi"
cpu: "0.2"
memory: "156Mi"
env:
- name: K8SGPT_MODEL
value: "gpt-3.5-turbo"
Expand Down
2 changes: 2 additions & 0 deletions container/manifests/service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ kind: Service
metadata:
name: k8sgpt-service
namespace: k8sgpt
labels:
app: k8sgpt
spec:
selector:
app: k8sgpt
Expand Down
15 changes: 15 additions & 0 deletions container/manifests/serviceMonitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app: k8sgpt
name: k8sgpt-service-monitor
namespace: k8sgpt
spec:
endpoints:
- honorLabels: true
path: /metrics
port: http
selector:
matchLabels:
app: k8sgpt
6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,10 @@ require (
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_golang v1.14.0
github.com/prometheus/client_golang v1.15.0
github.com/prometheus/client_model v0.3.0 // indirect
github.com/prometheus/common v0.37.0 // indirect
github.com/prometheus/procfs v0.8.0 // indirect
github.com/prometheus/common v0.42.0 // indirect
github.com/prometheus/procfs v0.9.0 // indirect
github.com/rivo/uniseg v0.4.4 // indirect
github.com/robfig/cron/v3 v3.0.1
github.com/rubenv/sql-migrate v1.3.1 // indirect
Expand Down
53 changes: 6 additions & 47 deletions go.sum

Large diffs are not rendered by default.

15 changes: 14 additions & 1 deletion pkg/ai/iai.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,17 @@ import (
)

type IAI interface {
Configure(token string, model string, language string) error
Configure(config IAIConfig, language string) error
GetCompletion(ctx context.Context, prompt string) (string, error)
Parse(ctx context.Context, prompt []string, nocache bool) (string, error)
GetName() string
}

type IAIConfig interface {
GetPassword() string
GetModel() string
}

func NewClient(provider string) IAI {
switch provider {
case "openai":
Expand All @@ -31,3 +36,11 @@ type AIProvider struct {
Model string `mapstructure:"model"`
Password string `mapstructure:"password"`
}

func (p *AIProvider) GetPassword() string {
return p.Password
}

func (p *AIProvider) GetModel() string {
return p.Model
}
5 changes: 3 additions & 2 deletions pkg/ai/noopai.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@ type NoOpAIClient struct {
model string
}

func (c *NoOpAIClient) Configure(token string, model string, language string) error {
func (c *NoOpAIClient) Configure(config IAIConfig, language string) error {
token := config.GetPassword()
c.language = language
c.client = fmt.Sprintf("I am a noop client with the token %s ", token)
c.model = model
c.model = config.GetModel()
return nil
}

Expand Down
15 changes: 5 additions & 10 deletions pkg/ai/openai.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,27 +15,22 @@ import (
"github.com/sashabaranov/go-openai"
)

const (
default_prompt = "Simplify the following Kubernetes error message and provide a solution in %s: %s"
prompt_a = "Read the following input %s and provide possible scenarios for remediation in %s"
prompt_b = "Considering the following input from the Kubernetes resource %s and the error message %s, provide possible scenarios for remediation in %s"
prompt_c = "Reading the following %s error message and it's accompanying log message %s, how would you simplify this message?"
)

type OpenAIClient struct {
client *openai.Client
language string
model string
}

func (c *OpenAIClient) Configure(token string, model string, language string) error {
client := openai.NewClient(token)
func (c *OpenAIClient) Configure(config IAIConfig, language string) error {
token := config.GetPassword()
defaultConfig := openai.DefaultConfig(token)
client := openai.NewClientWithConfig(defaultConfig)
if client == nil {
return errors.New("error creating OpenAI client")
}
c.language = language
c.client = client
c.model = model
c.model = config.GetModel()
return nil
}

Expand Down
8 changes: 8 additions & 0 deletions pkg/ai/prompts.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package ai

const (
default_prompt = "Simplify the following Kubernetes error message and provide a solution in %s: %s"
prompt_a = "Read the following input %s and provide possible scenarios for remediation in %s"
prompt_b = "Considering the following input from the Kubernetes resource %s and the error message %s, provide possible scenarios for remediation in %s"
prompt_c = "Reading the following %s error message and it's accompanying log message %s, how would you simplify this message?"
)
21 changes: 12 additions & 9 deletions pkg/analysis/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"os"
"reflect"
"strings"

"github.com/fatih/color"
Expand Down Expand Up @@ -68,7 +69,7 @@ func NewAnalysis(backend string, language string, filters []string, namespace st
}

aiClient := ai.NewClient(aiProvider.Name)
if err := aiClient.Configure(aiProvider.Password, aiProvider.Model, language); err != nil {
if err := aiClient.Configure(&aiProvider, language); err != nil {
color.Red("Error: %v", err)
return nil, err
}
Expand All @@ -95,7 +96,7 @@ func NewAnalysis(backend string, language string, filters []string, namespace st
}, nil
}

func (a *Analysis) RunAnalysis() error {
func (a *Analysis) RunAnalysis() []error {
activeFilters := viper.GetStringSlice("active_filters")

analyzerMap := analyzer.GetAnalyzerMap()
Expand All @@ -107,16 +108,18 @@ func (a *Analysis) RunAnalysis() error {
AIClient: a.AIClient,
}

var errorList []error

// if there are no filters selected and no active_filters then run all of them
if len(a.Filters) == 0 && len(activeFilters) == 0 {
for _, analyzer := range analyzerMap {
results, err := analyzer.Analyze(analyzerConfig)
if err != nil {
return err
errorList = append(errorList, errors.New(fmt.Sprintf("[%s] %s", reflect.TypeOf(analyzer).Name(), err)))
}
a.Results = append(a.Results, results...)
}
return nil
return errorList
}

// if the filters flag is specified
Expand All @@ -125,27 +128,27 @@ func (a *Analysis) RunAnalysis() error {
if analyzer, ok := analyzerMap[filter]; ok {
results, err := analyzer.Analyze(analyzerConfig)
if err != nil {
return err
errorList = append(errorList, errors.New(fmt.Sprintf("[%s] %s", filter, err)))
}
a.Results = append(a.Results, results...)
} else {
return errors.New(fmt.Sprintf("\"%s\" filter does not exist. Please run k8sgpt filters list.", filter))
errorList = append(errorList, errors.New(fmt.Sprintf("\"%s\" filter does not exist. Please run k8sgpt filters list.", filter)))
}
}
return nil
return errorList
}

// use active_filters
for _, filter := range activeFilters {
if analyzer, ok := analyzerMap[filter]; ok {
results, err := analyzer.Analyze(analyzerConfig)
if err != nil {
return err
errorList = append(errorList, errors.New(fmt.Sprintf("[%s] %s", filter, err)))
}
a.Results = append(a.Results, results...)
}
}
return nil
return errorList
}

func (a *Analysis) GetAIResults(output string, anonymize bool) error {
Expand Down
19 changes: 18 additions & 1 deletion pkg/analyzer/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func (PodAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) {
}
}

// Check through container status to check for crashes
// Check through container status to check for crashes or unready
for _, containerStatus := range pod.Status.ContainerStatuses {
if containerStatus.State.Waiting != nil {
if containerStatus.State.Waiting.Reason == "CrashLoopBackOff" || containerStatus.State.Waiting.Reason == "ImagePullBackOff" {
Expand All @@ -70,6 +70,23 @@ func (PodAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) {
})
}
}
} else {
// when pod is Running but its ReadinessProbe fails
if containerStatus.Ready == false && pod.Status.Phase == "Running" {
// parse the event log and append details
evt, err := FetchLatestEvent(a.Context, a.Client, pod.Namespace, pod.Name)
if err != nil || evt == nil {
continue
}
if evt.Reason == "Unhealthy" && evt.Message != "" {
failures = append(failures, common.Failure{
Text: evt.Message,
Sensitive: []common.Sensitive{},
})

}

}
}
}
if len(failures) > 0 {
Expand Down
70 changes: 56 additions & 14 deletions pkg/analyzer/pod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,65 @@ import (

func TestPodAnalyzer(t *testing.T) {

clientset := fake.NewSimpleClientset(&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "example",
Namespace: "default",
Annotations: map[string]string{},
clientset := fake.NewSimpleClientset(
&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "example",
Namespace: "default",
Annotations: map[string]string{},
},
Status: v1.PodStatus{
Phase: v1.PodPending,
Conditions: []v1.PodCondition{
{
Type: v1.PodScheduled,
Reason: "Unschedulable",
Message: "0/1 nodes are available: 1 node(s) had taint {node-role.kubernetes.io/master: }, that the pod didn't tolerate.",
},
},
},
},
Status: v1.PodStatus{
Phase: v1.PodPending,
Conditions: []v1.PodCondition{
{
Type: v1.PodScheduled,
Reason: "Unschedulable",
Message: "0/1 nodes are available: 1 node(s) had taint {node-role.kubernetes.io/master: }, that the pod didn't tolerate.",
&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "example2",
Namespace: "default",
},
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
Name: "example2",
Ready: false,
},
},
Conditions: []v1.PodCondition{
{
Type: v1.ContainersReady,
Reason: "ContainersNotReady",
Message: "containers with unready status: [example2]",
},
},
},
},
})
// simulate event: 30s Warning Unhealthy pod/my-nginx-7fb4dbcf47-4ch4w Readiness probe failed: bash: xxxx: command not found
&v1.Event{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
InvolvedObject: v1.ObjectReference{
Kind: "Pod",
Name: "example2",
Namespace: "default",
UID: "differentUid",
APIVersion: "v1",
},
Reason: "Unhealthy",
Message: "readiness probe failed: the detail reason here ...",
Source: v1.EventSource{Component: "eventTest"},
Count: 1,
Type: v1.EventTypeWarning,
})

config := common.Analyzer{
Client: &kubernetes.Client{
Expand All @@ -45,7 +87,7 @@ func TestPodAnalyzer(t *testing.T) {
if err != nil {
t.Error(err)
}
assert.Equal(t, len(analysisResults), 1)
assert.Equal(t, len(analysisResults), 2)
}

func TestPodAnalyzerNamespaceFiltering(t *testing.T) {
Expand Down
12 changes: 7 additions & 5 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,14 @@ func (s *Config) analyzeHandler(w http.ResponseWriter, r *http.Request) {
return
}

err = config.RunAnalysis()
if err != nil {
color.Red("Error: %v", err)
analysisErrors := config.RunAnalysis()
if analysisErrors != nil {
var errorMessage string
for _, err := range analysisErrors {
errorMessage += err.Error() + "\n"
}
http.Error(w, errorMessage, http.StatusInternalServerError)
health.Failure++
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}

if explain {
Expand Down

0 comments on commit 898c824

Please sign in to comment.