fix: make gemini fail more less

Signed-off-by: Abirdcfly <fp544037857@gmail.com>
Abirdcfly · Mar 8, 2024 · 6612969 · 6612969
1 parent 184c4dc
commit 6612969
Show file tree

Hide file tree

Showing 5 changed files with 126 additions and 66 deletions.
diff --git a/.github/workflows/example_test.yaml b/.github/workflows/example_test.yaml
@@ -65,7 +65,7 @@ jobs:
       - build-image
     runs-on: ubuntu-latest
     strategy:
-      fail-fast: true
+      fail-fast: false
       matrix:
         no: [1, 2, 3]
     steps:
@@ -97,6 +97,8 @@ jobs:
           	--create-dirs -o /usr/local/bin/mc && chmod +x /usr/local/bin/mc )
       - name: Example test
         run: tests/example-test.sh
+        env:
+          GITHUB_ACTION_NO: ${{ matrix.no }}
       - name: Upload logs if test fail
         if: failure()
         uses: actions/upload-artifact@v4

diff --git a/controllers/base/llm_controller.go b/controllers/base/llm_controller.go
@@ -272,5 +272,5 @@ func (r *LLMReconciler) UpdateStatus(ctx context.Context, instance *arcadiav1alp
 		newCondition = instance.ReadyCondition(msg)
 	}
 	instanceCopy.Status.SetConditions(newCondition)
-	return r.Client.Status().Update(ctx, instanceCopy)
+	return errors.Join(err, r.Client.Status().Update(ctx, instanceCopy))
 }
diff --git a/pkg/appruntime/app_runtime.go b/pkg/appruntime/app_runtime.go
@@ -21,6 +21,7 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"runtime/debug"
 	"strings"
 
 	langchaingoschema "github.com/tmc/langchaingo/schema"
@@ -169,6 +170,11 @@ func (a *Application) Run(ctx context.Context, cli client.Client, respStream cha
 				continue
 			}
 			klog.FromContext(ctx).V(3).Info(fmt.Sprintf("try to run node:%s", e.Name()))
+			defer func() {
+				if r := recover(); r != nil {
+					klog.FromContext(ctx).Info(fmt.Sprintf("Recovered from node:%s error:%s stack:%s", e.Name(), r, string(debug.Stack())))
+				}
+			}()
 			if out, err = e.Run(ctx, cli, out); err != nil {
 				return Output{}, fmt.Errorf("run node %s: %w", e.Name(), err)
 			}

diff --git a/pkg/appruntime/log/log.go b/pkg/appruntime/log/log.go
@@ -36,125 +36,146 @@ var _ callbacks.Handler = KLogHandler{}
 
 func (l KLogHandler) HandleLLMGenerateContentStart(ctx context.Context, ms []llms.MessageContent) {
 	logger := klog.FromContext(ctx)
-	logger.V(l.LogLevel).Info("Entering LLM with messages:")
+	buf := strings.Builder{}
+	buf.WriteString("Entering LLM with messages: ")
 	for _, m := range ms {
 		// TODO: Implement logging of other content types
-		var buf strings.Builder
+		buf.WriteString("text: ")
 		for _, t := range m.Parts {
 			if t, ok := t.(llms.TextContent); ok {
 				buf.WriteString(t.Text)
 			}
 		}
-		logger.V(l.LogLevel).Info("Role:", m.Role)
-		logger.V(l.LogLevel).Info("Text:", buf.String())
+		buf.WriteString("Role: ")
+		buf.WriteString(string(m.Role))
 	}
+	logger.WithValues("logger", "arcadia")
+	logger.V(l.LogLevel).Info(buf.String())
 }
 
 func (l KLogHandler) HandleLLMGenerateContentEnd(ctx context.Context, res *llms.ContentResponse) {
 	logger := klog.FromContext(ctx)
-	logger.V(l.LogLevel).Info("Exiting LLM with response:")
+	buf := strings.Builder{}
+	buf.WriteString("Exiting LLM with response: ")
 	for _, c := range res.Choices {
 		if c.Content != "" {
-			logger.V(l.LogLevel).Info("Content:", c.Content)
+			buf.WriteString("Content: " + c.Content)
 		}
 		if c.StopReason != "" {
-			logger.V(l.LogLevel).Info("StopReason:", c.StopReason)
+			buf.WriteString("StopReason: " + c.StopReason)
 		}
 		if len(c.GenerationInfo) > 0 {
-			logger.V(l.LogLevel).Info("GenerationInfo:")
+			buf.WriteString("GenerationInfo: ")
 			for k, v := range c.GenerationInfo {
-				fmt.Printf("%20s: %v\n", k, v)
+				buf.WriteString(fmt.Sprintf("%20s: %#v\n", k, v))
 			}
 		}
 		if c.FuncCall != nil {
-			logger.V(l.LogLevel).Info("FuncCall: ", c.FuncCall.Name, c.FuncCall.Arguments)
+			buf.WriteString("FuncCall: " + c.FuncCall.Name + " " + c.FuncCall.Arguments)
 		}
 	}
+	logger.WithValues("logger", "arcadia")
+	logger.V(l.LogLevel).Info(buf.String())
 }
 
 func (l KLogHandler) HandleStreamingFunc(ctx context.Context, chunk []byte) {
 	logger := klog.FromContext(ctx)
-	logger.V(l.LogLevel).Info(string(chunk))
+	logger.WithValues("logger", "arcadia")
+	logger.V(l.LogLevel).Info("log streaming: " + string(chunk))
 }
 
 func (l KLogHandler) HandleText(ctx context.Context, text string) {
 	logger := klog.FromContext(ctx)
-	logger.V(l.LogLevel).Info(text)
+	logger.WithValues("logger", "arcadia")
+	logger.V(l.LogLevel).Info("log text: " + text)
 }
 
 func (l KLogHandler) HandleLLMStart(ctx context.Context, prompts []string) {
 	logger := klog.FromContext(ctx)
-	logger.V(l.LogLevel).Info("Entering LLM with prompts:", prompts)
+	buf := strings.Builder{}
+	buf.WriteString("Entering LLM with prompts: ")
+	for _, p := range prompts {
+		buf.WriteString(p)
+		buf.WriteString(" ")
+	}
+	logger.WithValues("logger", "arcadia")
+	logger.V(l.LogLevel).Info(buf.String())
 }
 
 func (l KLogHandler) HandleLLMError(ctx context.Context, err error) {
 	logger := klog.FromContext(ctx)
-	logger.V(l.LogLevel).Info("Exiting LLM with error:", err)
+	logger.WithValues("logger", "arcadia")
+	logger.V(l.LogLevel).Error(err, "Exiting LLM with error")
 }
 
 func (l KLogHandler) HandleChainStart(ctx context.Context, inputs map[string]any) {
 	logger := klog.FromContext(ctx)
-	logger.V(l.LogLevel).Info("Entering chain with inputs:", formatChainValues(inputs))
+	logger.WithValues("logger", "arcadia")
+	logger.V(l.LogLevel).Info(fmt.Sprintf("Entering chain with inputs: %#v", inputs))
 }
 
 func (l KLogHandler) HandleChainEnd(ctx context.Context, outputs map[string]any) {
 	logger := klog.FromContext(ctx)
-	logger.V(l.LogLevel).Info("Exiting chain with outputs:", formatChainValues(outputs))
+	logger.WithValues("logger", "arcadia")
+	logger.V(l.LogLevel).Info(fmt.Sprintf("Exiting chain with outputs: %#v", outputs))
 }
 
 func (l KLogHandler) HandleChainError(ctx context.Context, err error) {
 	logger := klog.FromContext(ctx)
-	logger.V(l.LogLevel).Info("Exiting chain with error:", err)
+	logger.WithValues("logger", "arcadia")
+	logger.V(l.LogLevel).Error(err, "Exiting chain with error")
 }
 
 func (l KLogHandler) HandleToolStart(ctx context.Context, input string) {
 	logger := klog.FromContext(ctx)
-	logger.V(l.LogLevel).Info("Entering tool with input:", removeNewLines(input))
+	logger.WithValues("logger", "arcadia")
+	logger.V(l.LogLevel).Info("Entering tool with input: " + removeNewLines(input))
 }
 
 func (l KLogHandler) HandleToolEnd(ctx context.Context, output string) {
 	logger := klog.FromContext(ctx)
-	logger.V(l.LogLevel).Info("Exiting tool with output:", removeNewLines(output))
+	logger.WithValues("logger", "arcadia")
+	logger.V(l.LogLevel).Info("Exiting tool with output: " + removeNewLines(output))
 }
 
 func (l KLogHandler) HandleToolError(ctx context.Context, err error) {
 	logger := klog.FromContext(ctx)
-	logger.V(l.LogLevel).Info("Exiting tool with error:", err)
+	logger.WithValues("logger", "arcadia")
+	logger.V(l.LogLevel).Error(err, "Exiting tool with error")
 }
 
 func (l KLogHandler) HandleAgentAction(ctx context.Context, action schema.AgentAction) {
 	logger := klog.FromContext(ctx)
-	logger.V(l.LogLevel).Info("Agent selected action:", formatAgentAction(action))
+	logger.WithValues("logger", "arcadia")
+	logger.V(l.LogLevel).Info("Agent selected action: " + formatAgentAction(action))
 }
 
 func (l KLogHandler) HandleAgentFinish(ctx context.Context, finish schema.AgentFinish) {
 	logger := klog.FromContext(ctx)
-	logger.V(l.LogLevel).Info(fmt.Sprintf("Agent finish: %v", finish))
+	logger.WithValues("logger", "arcadia")
+	logger.V(l.LogLevel).Info("Agent finish: " + formatAgentFinish(finish))
 }
 
 func (l KLogHandler) HandleRetrieverStart(ctx context.Context, query string) {
 	logger := klog.FromContext(ctx)
-	logger.V(l.LogLevel).Info("Entering retriever with query:", removeNewLines(query))
+	logger.WithValues("logger", "arcadia")
+	logger.V(l.LogLevel).Info("Entering retriever with query: " + removeNewLines(query))
 }
 
 func (l KLogHandler) HandleRetrieverEnd(ctx context.Context, query string, documents []schema.Document) {
 	logger := klog.FromContext(ctx)
-	logger.V(l.LogLevel).Info(fmt.Sprintf("Exiting retriever with documents for query:%s documents: %v", query, documents))
-}
-
-func formatChainValues(values map[string]any) string {
-	output := ""
-	for key, value := range values {
-		output += fmt.Sprintf("\"%s\" : \"%s\", ", removeNewLines(key), removeNewLines(value))
-	}
-
-	return output
+	logger.WithValues("logger", "arcadia")
+	// TODO need format
+	// logger.V(l.LogLevel).Info(fmt.Sprintf("Exiting retriever with documents for query:%s documents: %#v", query, documents))
+	logger.V(l.LogLevel).Info(fmt.Sprintf("Exiting retriever with documents for query: %s", query))
 }
 
 func formatAgentAction(action schema.AgentAction) string {
 	return fmt.Sprintf("\"%s\" with input \"%s\"", removeNewLines(action.Tool), removeNewLines(action.ToolInput))
 }
-
+func formatAgentFinish(finish schema.AgentFinish) string {
+	return fmt.Sprintf("ReturnValues: %#v Log: %s", removeNewLines(finish.ReturnValues), removeNewLines(finish.Log))
+}
 func removeNewLines(s any) string {
 	return strings.ReplaceAll(fmt.Sprint(s), "\n", " ")
 }
diff --git a/tests/example-test.sh b/tests/example-test.sh
@@ -184,37 +184,39 @@ function getRespInAppChat() {
 	query=$3
 	conversationID=$4
 	testStream=$5
-	START_TIME=$(date +%s)
+	RETRY_COUNT=3
+	attempt=0
 	while true; do
+		info "sleep 3 seconds"
+		sleep 3
 		data=$(jq -n --arg appname "$appname" --arg query "$query" --arg namespace "$namespace" --arg conversationID "$conversationID" '{"query":$query,"response_mode":"blocking","conversation_id":$conversationID,"app_name":$appname, "app_namespace":$namespace}')
 		resp=$(curl -s -XPOST http://127.0.0.1:8081/chat --data "$data")
 		ai_data=$(echo $resp | jq -r '.message')
 		references=$(echo $resp | jq -r '.references')
 		if [ -z "$ai_data" ] || [ "$ai_data" = "null" ]; then
 			echo $resp
-			exit 1
+			attempt=$((attempt + 1))
+			if [ $attempt -gt $RETRY_COUNT ]; then
+				echo "❌: Failed. Retry count exceeded."
+				exit 1
+			fi
+			echo "🔄: Failed. Attempt $attempt/$RETRY_COUNT"
+			continue
 		fi
 		echo "👤: ${query}"
 		echo "🤖: ${ai_data}"
 		echo "🔗: ${references}"
-		resp_conversation_id=$(echo $resp | jq -r '.conversation_id')
-
-		if [ $testStream == "true" ]; then
-			info "sleep 3 seconds"
-			sleep 3
-			info "just test stream mode"
-			data=$(jq -n --arg appname "$appname" --arg query "$query" --arg namespace "$namespace" --arg conversationID "$conversationID" '{"query":$query,"response_mode":"streaming","conversation_id":$conversationID,"app_name":$appname, "app_namespace":$namespace}')
-			curl --max-time $TimeoutSeconds -s -XPOST http://127.0.0.1:8081/chat --data "$data"
-		fi
 		break
-		CURRENT_TIME=$(date +%s)
-		ELAPSED_TIME=$((CURRENT_TIME - START_TIME))
-		if [ $ELAPSED_TIME -gt $TimeoutSeconds ]; then
-			error "Timeout reached"
-			exit 1
-		fi
-		sleep 5
 	done
+	resp_conversation_id=$(echo $resp | jq -r '.conversation_id')
+
+	if [ $testStream == "true" ]; then
+		info "sleep 3 seconds"
+		sleep 3
+		info "just test stream mode"
+		data=$(jq -n --arg appname "$appname" --arg query "$query" --arg namespace "$namespace" --arg conversationID "$conversationID" '{"query":$query,"response_mode":"streaming","conversation_id":$conversationID,"app_name":$appname, "app_namespace":$namespace}')
+		curl --max-time $TimeoutSeconds -s -XPOST http://127.0.0.1:8081/chat --data "$data"
+	fi
 }
 
 info "1. create kind cluster"
@@ -362,6 +364,21 @@ if [[ $GITHUB_ACTIONS == "true" ]]; then
 	info "in github action, use gemini"
 	sed -i 's/model: chatglm_turbo/model: gemini-pro/g' config/samples/*
 	sed -i 's/model: glm-4/model: gemini-pro/g' config/samples/*
+	case "$GITHUB_ACTION_NO" in
+	1)
+		info "in github action no 1, use gemini apikey github-action-1"
+		sed -i 's/apiKey: "QUl6YVN5QVZOdGRYOHpkeU5pNWpubzNYSExUWGM0UnpJSGxIRUFz"/apiKey: "QUl6YVN5QTBBWGVNOEJoRGpoSDN3MjBYdHc3NEQ3QUpVaV9meFRr"/g' config/samples/app_shared_llm_service_gemini.yaml
+		;;
+	2)
+		info "in github action no 2, use gemini apikey github-action-2"
+		sed -i 's/apiKey: "QUl6YVN5QVZOdGRYOHpkeU5pNWpubzNYSExUWGM0UnpJSGxIRUFz"/apiKey: "QUl6YVN5QlZPeXpQUlc0aE5tQ244QkV1MmxBcEYyeWo2eVVfcU93"/g' config/samples/app_shared_llm_service_gemini.yaml
+		;;
+	3)
+		info "in github action no 3, use gemini apikey github-action-3"
+		sed -i 's/apiKey: "QUl6YVN5QVZOdGRYOHpkeU5pNWpubzNYSExUWGM0UnpJSGxIRUFz"/apiKey: "QUl6YVN5RHJlSmtPZXZXZHZ5NGRUU1lrbGFFTFVzN0tQQktUZXdZ"/g' config/samples/app_shared_llm_service_gemini.yaml
+		;;
+	*) ;;
+	esac
 	kubectl apply -f config/samples/app_shared_llm_service_gemini.yaml
 else
 	info "in local, use zhipu"
@@ -441,18 +458,32 @@ if [[ $resp == *"$delete_conversation_id"* ]]; then
 	exit 1
 fi
 info "8.4.5 get app prompt starters"
-resp=$(curl --max-time $TimeoutSeconds -s -XPOST http://127.0.0.1:8081/chat/prompt-starter --data '{"app_name": "base-chat-with-bot", "app_namespace": "arcadia"}')
-echo $resp | jq .
-if [[ $resp == *"error"* ]]; then
-	echo "failed"
-	exit 1
-fi
-resp=$(curl --max-time $TimeoutSeconds -s -XPOST http://127.0.0.1:8081/chat/prompt-starter --data '{"app_name": "base-chat-with-knowledgebase-pgvector", "app_namespace": "arcadia"}')
-echo $resp | jq .
-if [[ $resp == *"error"* ]]; then
-	echo "failed"
-	exit 1
-fi
+RETRY_COUNT=3
+attempt=0
+while true; do
+	info "sleep 3 seconds"
+	sleep 3
+	info "get app prompt starters without knowledgebase"
+	resp=$(curl --max-time $TimeoutSeconds -s -XPOST http://127.0.0.1:8081/chat/prompt-starter --data '{"app_name": "base-chat-with-bot", "app_namespace": "arcadia"}')
+	echo $resp | jq .
+	if [[ $resp == *"error"* ]]; then
+		attempt=$((attempt + 1))
+		if [ $attempt -gt $RETRY_COUNT ]; then
+			echo "❌: Failed. Retry count exceeded."
+			exit 1
+		fi
+		echo "🔄: Failed. Attempt $attempt/$RETRY_COUNT"
+		continue
+	fi
+	info "get app prompt starters with knowledgebase"
+	resp=$(curl --max-time $TimeoutSeconds -s -XPOST http://127.0.0.1:8081/chat/prompt-starter --data '{"app_name": "base-chat-with-knowledgebase-pgvector", "app_namespace": "arcadia"}')
+	echo $resp | jq .
+	if [[ $resp == *"error"* ]]; then
+		echo "failed"
+		exit 1
+	fi
+	break
+done
 
 # There is uncertainty in the AI replies, most of the time, it will pass the test, a small percentage of the time, the AI will call names in each reply, causing the test to fail, therefore, temporarily disable the following tests
 #getRespInAppChat "base-chat-with-bot" "arcadia" "What is your model?" ${resp_conversation_id} "false"