kubeshop · pkosiec · Jul 9, 2024 · Jun 28, 2024 · Jul 9, 2024
diff --git a/hack/assistant-setup/file-search.ts b/hack/assistant-setup/file-search.ts
@@ -22,6 +22,9 @@ export async function setupFileSearch(client: OpenAI): Promise<string> {
   const vectorStore = await client.beta.vectorStores.create({
     name: vectorStoreName,
   });
+  console.log(
+    `Created vector store '${vectorStore.name}' (ID: ${vectorStore.id})`,
+  );
 
   console.log(
     "Uploading files to vector store and waiting for the file batch processing to complete. This might take a few minutes...",

diff --git a/hack/assistant-setup/index.ts b/hack/assistant-setup/index.ts
@@ -47,7 +47,7 @@ const instructions = dedent`
 
 async function main() {
   let cfg: Config = {
-    projectID: "",
+    projectID: undefined,
     assistantID: "",
   };
   const assistantEnv = process.env["ASSISTANT_ENV"];

diff --git a/hack/assistant-setup/tools.ts b/hack/assistant-setup/tools.ts
@@ -237,5 +237,98 @@ export function setupTools(): Array<AssistantTool> {
         },
       },
     },
+    {
+      type: "function",
+      function: {
+        name: "kubescapeScanCluster",
+        description: dedent`
+          It serves as an all-in-one tool for vulnerability and misconfiguration scanning for the whole Kubernetes cluster.
+          Kubescape includes misconfiguration and vulnerability scanning as well as risk analysis and security compliance indicators.
+          All results are presented in context and users get many cues on what to do based on scan results.
+          It saves Kubernetes users and admins precious time, effort, and resources.
+          `,
+      },
+    },
+    {
+      type: "function",
+      function: {
+        name: "kubescapeScanWorkload",
+        description: dedent`
+            Allows you to comprehensively report on the security posture of individual workloads running in a Kubernetes cluster.
+            This includes both misconfiguration and image vulnerability scanning.
+            This scan results in information that gives a 360-degree assessment of your workload's security posture.
+
+            Usage:
+            # Scan a workload
+            kubescape scan workload {kind}/{name}
+            # Scan a workload in a specific namespace
+            kubescape scan workload {kind}/{name} --namespace {namespace}
+            `,
+        parameters: {
+          type: "object",
+          properties: {
+            namespace: {
+              type: "string",
+              description: "Kubernetes namespace, e.g. kube-system",
+            },
+            resource_kind: {
+              type: "string",
+              description:
+                "Kubernetes workload kind, e.g. Deployment or StatefulSet.",
+            },
+            resource_name: {
+              type: "string",
+              description: "Kubernetes workload name, e.g. botkube-api-server.",
+            },
+          },
+          required: ["resource_kind", "resource_name"],
+        },
+      },
+    },
+    {
+      type: "function",
+      function: {
+        name: "kubescapeScanImage",
+        description: dedent`
+            Scan an image for vulnerabilities.
+
+            Usage:
+            kubescape scan image "nginx"
+            kubescape scan image "nginx:latest"
+            `,
+        parameters: {
+          type: "object",
+          properties: {
+            image: {
+              type: "string",
+              description: "Image name with tag, e.g. nginx:latest",
+            },
+          },
+          required: ["image"],
+        },
+      },
+    },
+    {
+      type: "function",
+      function: {
+        name: "kubescapeScanControl",
+        description: dedent`
+            Allows you to get details about a given Kubescape issue based on ID like "C-0188" or "C-0007".
+
+            Usage:
+            kubescape scan control {control ID}
+            `,
+        parameters: {
+          type: "object",
+          properties: {
+            control: {
+              type: "string",
+              description: "Control ID, e.g. C-0188.",
+            },
+          },
+          required: ["control"],
+        },
+      },
+    },
   ];
 }
diff --git a/internal/source/ai-brain/assistant.go b/internal/source/ai-brain/assistant.go
@@ -10,6 +10,8 @@ import (
 	"strings"
 	"time"
 
+	"github.com/MakeNowJust/heredoc"
+
 	"github.com/kubeshop/botkube/pkg/ptr"
 
 	"github.com/kubeshop/botkube-cloud-plugins/internal/otelx"
@@ -28,52 +30,61 @@ import (
 )
 
 const (
-	cacheTTL                  = 8 * time.Hour
-	openAIPollInterval        = 2 * time.Second
-	maxToolExecutionRetries   = 3
-	quotaExceededErrCode      = "quota_exceeded"
-	tracerName                = "source.aibrain"
-	serviceName               = "botkube-plugins-source-ai-brain"
-	clusterScanSubcommandName = "scan"
-
-	clusterScanPrompt = `
-Scan the Kubernetes cluster for critical issues that could significantly impact the cluster's health, stability, or security.
-Focus on problems that may not be immediately apparent through events or standard monitoring.
-
-Provide a concise overview of the scan results, including the total number of
-critical issues found. If there were no issues found for a specific check, do
-not include that section in the report. List the Kubernetes objects directly
-affected by the issue. Make sure that your checks are relevant to the current
-state of the cluster, do not include resources that no longer exist.
-
-Summary section needs to be at the top of the report, followed by specific checks.
-
-Specific Checks:
-
-Pod Health:
-Identify pods in a crash-loop backoff state with a high restart count.
-Identify pods that have been OOMKilled (Out of Memory Killed) multiple times.
-Look for pods stuck in a pending state for an extended period.
-Resource Utilization:
-Identify nodes or pods with critically high CPU or memory usage (e.g., above 90% of limits).
-Check for critical resource starvation issues affecting multiple pods or namespaces.
-Configuration:
-Look for pods running with very insecure capabilities (e.g., ALL, NET_RAW, SYS_ADMIN).
-Identify pods using deprecated or insecure container images.
-Check for misconfigured network policies that could expose sensitive services.
-Networking:
-Identify pods or services experiencing significant network latency or packet loss.
-Check for network partitions or connectivity issues between critical components.
-
-Additional Guidance for the LLM Agent:
-
-Prioritize issues that pose the most immediate threat to the cluster's stability, performance, or security.
-Filter out informational or low-severity issues that are unlikely to cause major problems.
-Be as specific as possible in the descriptions. Do not exceed 2000 characters in your response.
-`
+	cacheTTL                          = 8 * time.Hour
+	openAIPollInterval                = 2 * time.Second
+	maxToolExecutionRetries           = 3
+	quotaExceededErrCode              = "quota_exceeded"
+	serviceName                       = "botkube-plugins-source-ai-brain"
+	temperature               float32 = 0.1
+	msgSplitPattern                   = "\n\n---\n\n"
+	clusterScanSubcommandName         = "scan"
+	multipleMessagesDelay             = 500 * time.Millisecond
 )
 
-var temperature float32 = 0.1
+var (
+	clusterScanPrompt = heredoc.Doc(`
+		Scan the Kubernetes cluster for critical issues that could significantly impact the cluster's health, stability, or security.
+		Focus on problems that may not be immediately apparent through events or standard monitoring.
+		Use Kubescape and kubectl tools to scan the cluster, and then aggregate the results based on the instructions.
+		Prioritize Kubescape scan results over the kubectl tools results. Include links for Kubescape controls which you got them from Kubescape scan results.
+
+		Provide a concise overview of the scan results, including the total number of issues found.
+		If there were no issues found for a specific check, do not include that section in the report.
+		List the Kubernetes objects directly affected by the issue.
+		Make sure that your checks are relevant to the current state of the cluster, do not include resources that no longer exist.
+
+		Summary section needs to be at the top of the report, followed by specific checks.
+		Summary outlines what are the issues and how many of them were found, and one line sentence about the overall cluster state based on the results.
+		Use emojis for the severity of the issues in the summary (critical/high/medium/low), and also for the headlines of the checks to distinguish them.
+		Use a separator "\n\n---\n\n" to split the message into TWO logical sections, no more.
+
+		Specific checks: 
+
+		Pod Health:
+		Identify pods in a crash-loop backoff state with a high restart count.
+		Identify pods that have been OOMKilled (Out of Memory Killed) multiple times.
+		Look for pods stuck in a pending state for an extended period.
+		Resource Utilization:
+		Identify nodes or pods with critically high CPU or memory usage. By critically high we mean over 90% or more. 
+		Check for critical resource starvation issues affecting multiple pods or namespaces.
+		Configuration:
+		Look for pods running with very insecure capabilities (e.g., ALL, NET_RAW, SYS_ADMIN).
+		Identify pods using deprecated or insecure container images.
+		Check for misconfigured network policies that could expose sensitive services.
+		Networking:
+		Identify pods or services experiencing significant network latency or packet loss.
+		Check for network partitions or connectivity issues between critical components.
+		Security
+		Under this section, include Security posture from Kubescape scan.
+
+		Additional Guidance for the LLM Agent:
+
+		Prioritize issues that pose the most immediate threat to the cluster's stability, performance, or security.
+		Skip the check output if there are no issues found for a given check. Filter out informational issues.
+		Be as specific as possible in the descriptions. Do not exceed 3000 characters in your response.
+		Don't show kubescape commands.
+		At the end of the message, add "Feel free to ask me to provide additional details, or help on how to resolve found issues!", without a separator, in any form you like.`)
+)
 
 type tool func(ctx context.Context, args []byte, p *Payload) (string, error)
 
@@ -105,6 +116,7 @@ func newAssistant(cfg *Config, log logrus.FieldLogger, out chan source.Event, ku
 	tracer := otel.Tracer(serviceName)
 
 	kcRunner := NewKubectlRunner(kubeConfigPath, tracer)
+	ksRunner := NewKubescapeRunner(kubeConfigPath, tracer)
 	bkRunner, err := NewBotkubeRunner(tracer)
 	if err != nil {
 		return nil, fmt.Errorf("while creating Botkube runner: %w", err)
@@ -133,6 +145,10 @@ func newAssistant(cfg *Config, log logrus.FieldLogger, out chan source.Event, ku
 			"kubectlLogs":                         kcRunner.Logs,
 			"botkubeGetStartupAgentConfiguration": bkRunner.GetStartupAgentConfiguration,
 			"botkubeGetAgentStatus":               bkRunner.GetAgentStatus,
+			"kubescapeScanCluster":                ksRunner.ScanCluster,
+			"kubescapeScanWorkload":               ksRunner.ScanWorkload,
+			"kubescapeScanControl":                ksRunner.ScanControl,
+			"kubescapeScanImage":                  ksRunner.ScanImage,
 		},
 		vectorStoreIDForThread: cfg.VectorStoreIDForThread,
 	}, nil
@@ -226,7 +242,7 @@ func (i *assistant) handleThread(ctx context.Context, p *Payload) (err error) {
 	})
 	run, err := i.openaiClient.CreateRun(ctx, threadID, openai.RunRequest{
 		AssistantID: i.assistID,
-		Temperature: &temperature,
+		Temperature: ptr.FromType(temperature),
 	})
 	if err != nil {
 		return fmt.Errorf("while creating a thread run: %w", err)
@@ -359,8 +375,19 @@ func (i *assistant) handleStatusCompleted(ctx context.Context, run openai.Run, p
 
 		textValue := i.trimCitationsIfPresent(i.log, c.Text)
 
-		i.out <- source.Event{
-			Message: msgAIAnswer(run, p, textValue, toolCalls),
+		msgs := strings.Split(textValue, msgSplitPattern)
+		isMultiMessage := len(msgs) > 1
+		for j, msg := range msgs {
+			isLastMessage := j == len(msgs)-1
+			i.out <- source.Event{
+				Message: msgAIAnswer(run, p, msg, toolCalls, isLastMessage),
+			}
+
+			if isMultiMessage {
+				// Ugly workaround to force ordering of messages in the same thread
+				// Probably PubSub related?
+				time.Sleep(multipleMessagesDelay)
+			}
 		}
 	}
 

diff --git a/internal/source/ai-brain/config.go b/internal/source/ai-brain/config.go
@@ -66,10 +66,16 @@ func binaryDependencies() map[string]api.Dependency {
 				"darwin/amd64":  fmt.Sprintf("https://dl.k8s.io/release/%s/bin/darwin/amd64/kubectl", kubectlVersion),
 				"darwin/arm64":  fmt.Sprintf("https://dl.k8s.io/release/%s/bin/darwin/arm64/kubectl", kubectlVersion),
 				"linux/amd64":   fmt.Sprintf("https://dl.k8s.io/release/%s/bin/linux/amd64/kubectl", kubectlVersion),
-				"linux/s390x":   fmt.Sprintf("https://dl.k8s.io/release/%s/bin/linux/s390x/kubectl", kubectlVersion),
-				"linux/ppc64le": fmt.Sprintf("https://dl.k8s.io/release/%s/bin/linux/ppc64le/kubectl", kubectlVersion),
 				"linux/arm64":   fmt.Sprintf("https://dl.k8s.io/release/%s/bin/linux/arm64/kubectl", kubectlVersion),
-				"linux/386":     fmt.Sprintf("https://dl.k8s.io/release/%s/bin/linux/386/kubectl", kubectlVersion),
+			},
+		},
+		kubescapeBinaryName: {
+			URLs: map[string]string{
+				"windows/amd64": fmt.Sprintf("https://github.com/kubescape/kubescape/releases/download/%s/kubescape-arm64-macos-latest", kubescapeVersion),
+				"darwin/amd64":  fmt.Sprintf("https://github.com/kubescape/kubescape/releases/download/%s/kubescape-macos-latest", kubescapeVersion),
+				"darwin/arm64":  fmt.Sprintf("https://github.com/kubescape/kubescape/releases/download/%s/kubescape-macos-latest", kubescapeVersion),
+				"linux/amd64":   fmt.Sprintf("https://github.com/kubescape/kubescape/releases/download/%s/kubescape-ubuntu-latest", kubescapeVersion),
+				"linux/arm64":   fmt.Sprintf("https://github.com/kubescape/kubescape/releases/download/%s/kubescape-arm64-ubuntu-latest", kubescapeVersion),
 			},
 		},
 	}