feat: add more config for app

Signed-off-by: Abirdcfly <fp544037857@gmail.com>
Abirdcfly · Dec 18, 2023 · 609f65a · 609f65a
1 parent 7a8279f
commit 609f65a
Show file tree

Hide file tree

Showing 27 changed files with 842 additions and 176 deletions.
diff --git a/.github/workflows/example_test.yaml b/.github/workflows/example_test.yaml
@@ -86,7 +86,7 @@ jobs:
         run: tests/example-test.sh
       - name: Upload logs if test fail
         if: failure()
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: ${{ github.sha }}-${{ matrix.no }}.logs
           path: ${{ env.LOG_DIR }}
diff --git a/api/app-node/chain/v1alpha1/llmchain_types.go b/api/app-node/chain/v1alpha1/llmchain_types.go
@@ -42,13 +42,45 @@ type Output struct {
 }
 
 type CommonChainConfig struct {
-	// 记忆相关参数
+	// for memory
 	Memory Memory `json:"memory,omitempty"`
+
+	// Model is the model to use in an llm call.like `gpt-3.5-turbo` or `chatglm_turbo`
+	// Usually this value is just empty
+	Model string `json:"model,omitempty"`
+	// MaxTokens is the maximum number of tokens to generate to use in a llm call.
+	// +kubebuilder:validation:Minimum=10
+	// +kubebuilder:validation:Maximum=4096
+	// +kubebuilder:default=512
+	MaxTokens int `json:"maxTokens,omitempty"`
+	// Temperature is the temperature for sampling to use in a llm call, between 0 and 1.
+	//+kubebuilder:validation:Minimum=0
+	//+kubebuilder:validation:Maximum=1
+	Temperature float64 `json:"temperature,omitempty"`
+	// StopWords is a list of words to stop on to use in a llm call.
+	StopWords []string `json:"stopWords,omitempty"`
+	// TopK is the number of tokens to consider for top-k sampling in a llm call.
+	TopK int `json:"topK,omitempty"`
+	// TopP is the cumulative probability for top-p sampling in a llm call.
+	TopP float64 `json:"topP,omitempty"`
+	// Seed is a seed for deterministic sampling in a llm call.
+	Seed int `json:"seed,omitempty"`
+	// MinLength is the minimum length of the generated text in a llm call.
+	MinLength int `json:"minLength,omitempty"`
+	// MaxLength is the maximum length of the generated text in a llm call.
+	MaxLength int `json:"maxLength,omitempty"`
+	// RepetitionPenalty is the repetition penalty for sampling in a llm call.
+	RepetitionPenalty float64 `json:"repetitionPenalty,omitempty"`
 }
 
 type Memory struct {
-	// 能记住的最大 token 数
+	// MaxTokenLimit is the maximum number of tokens to keep in memory. Can only use MaxTokenLimit or ConversionWindowSize.
 	MaxTokenLimit int `json:"maxTokenLimit,omitempty"`
+	// ConversionWindowSize is the maximum number of conversation rounds in memory.Can only use MaxTokenLimit or ConversionWindowSize.
+	// +kubebuilder:validation:Minimum=0
+	// +kubebuilder:validation:Maximum=30
+	// +kubebuilder:default=5
+	ConversionWindowSize int `json:"conversionWindowSize,omitempty"`
 }
 
 // LLMChainStatus defines the observed state of LLMChain

diff --git a/api/app-node/chain/v1alpha1/zz_generated.deepcopy.go b/api/app-node/chain/v1alpha1/zz_generated.deepcopy.go
diff --git a/api/app-node/retriever/v1alpha1/knowledgebaseretriever_types.go b/api/app-node/retriever/v1alpha1/knowledgebaseretriever_types.go
@@ -25,9 +25,10 @@ import (
 
 // KnowledgeBaseRetrieverSpec defines the desired state of KnowledgeBaseRetriever
 type KnowledgeBaseRetrieverSpec struct {
-	v1alpha1.CommonSpec `json:",inline"`
-	Input               Input  `json:"input,omitempty"`
-	Output              Output `json:"output,omitempty"`
+	v1alpha1.CommonSpec   `json:",inline"`
+	Input                 Input  `json:"input,omitempty"`
+	Output                Output `json:"output,omitempty"`
+	CommonRetrieverConfig `json:",inline"`
 }
 
 type Input struct {
@@ -38,6 +39,22 @@ type Output struct {
 	node.CommonOrInPutOrOutputRef `json:",inline"`
 }
 
+type CommonRetrieverConfig struct {
+	// ScoreThreshold is the cosine distance float score threshold. Lower score represents more similarity.
+	// +kubebuilder:validation:Minimum=0
+	// +kubebuilder:validation:Maximum=1
+	// +kubebuilder:default=0.7
+	ScoreThreshold float32 `json:"scoreThreshold,omitempty"`
+	// NumDocuments is the max number of documents to return.
+	// +kubebuilder:default=5
+	// +kubebuilder:validation:Minimum=1
+	// +kubebuilder:validation:Maximum=10
+	NumDocuments int `json:"numDocuments,omitempty"`
+	// DocNullReturn is the return statement when the query result is empty from the retriever.
+	// +kubebuilder:default="未找到您询问的内容，请详细描述您的问题"
+	DocNullReturn string `json:"docNullReturn,omitempty"`
+}
+
 // KnowledgeBaseRetrieverStatus defines the observed state of KnowledgeBaseRetriever
 type KnowledgeBaseRetrieverStatus struct {
 	// ObservedGeneration is the last observed generation.

diff --git a/api/app-node/retriever/v1alpha1/zz_generated.deepcopy.go b/api/app-node/retriever/v1alpha1/zz_generated.deepcopy.go
diff --git a/apiserver/pkg/chat/chat.go b/apiserver/pkg/chat/chat.go
@@ -22,6 +22,7 @@ import (
 	"errors"
 	"time"
 
+	"github.com/tmc/langchaingo/memory"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/runtime/schema"
@@ -73,6 +74,7 @@ func AppRun(ctx context.Context, req ChatReqBody) (*ChatRespBody, chan ChatRespB
 			StartedAt:   time.Now(),
 			UpdatedAt:   time.Now(),
 			Messages:    make([]Message, 0),
+			History:     memory.NewChatMessageHistory(),
 		}
 	}
 	conversion.Messages = append(conversion.Messages, Message{
@@ -85,7 +87,7 @@ func AppRun(ctx context.Context, req ChatReqBody) (*ChatRespBody, chan ChatRespB
 	if err != nil {
 		return nil, nil, err
 	}
-	out, outStream, err := appRun.Run(ctx, c, application.Input{Question: req.Query, NeedStream: req.ResponseMode == Streaming})
+	out, outStream, err := appRun.Run(ctx, c, application.Input{Question: req.Query, NeedStream: req.ResponseMode == Streaming, History: conversion.History})
 	if err != nil {
 		return nil, nil, err
 	}

diff --git a/apiserver/pkg/chat/chat_type.go b/apiserver/pkg/chat/chat_type.go
@@ -16,7 +16,11 @@ limitations under the License.
 
 package chat
 
-import "time"
+import (
+	"time"
+
+	"github.com/tmc/langchaingo/memory"
+)
 
 type ResponseMode string
 
@@ -48,6 +52,7 @@ type Conversion struct {
 	StartedAt   time.Time `json:"started_at"`
 	UpdatedAt   time.Time `json:"updated_at"`
 	Messages    []Message `json:"messages"`
+	History     *memory.ChatMessageHistory
 }
 
 type Message struct {

diff --git a/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_llmchains.yaml b/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_llmchains.yaml
@@ -88,13 +88,40 @@ spec:
                 - llm
                 - prompt
                 type: object
+              maxLength:
+                description: MaxLength is the maximum length of the generated text
+                  in a llm call.
+                type: integer
+              maxTokens:
+                default: 512
+                description: MaxTokens is the maximum number of tokens to generate
+                  to use in a llm call.
+                maximum: 4096
+                minimum: 10
+                type: integer
               memory:
-                description: 记忆相关参数
+                description: for memory
                 properties:
+                  conversionWindowSize:
+                    default: 5
+                    description: ConversionWindowSize is the maximum number of conversation
+                      rounds in memory.Can only use MaxTokenLimit or ConversionWindowSize.
+                    maximum: 30
+                    minimum: 0
+                    type: integer
                   maxTokenLimit:
-                    description: 能记住的最大 token 数
+                    description: MaxTokenLimit is the maximum number of tokens to
+                      keep in memory. Can only use MaxTokenLimit or ConversionWindowSize.
                     type: integer
                 type: object
+              minLength:
+                description: MinLength is the minimum length of the generated text
+                  in a llm call.
+                type: integer
+              model:
+                description: Model is the model to use in an llm call.like `gpt-3.5-turbo`
+                  or `chatglm_turbo` Usually this value is just empty
+                type: string
               output:
                 properties:
                   apiGroup:
@@ -111,6 +138,33 @@ spec:
                     description: Name is the name of resource being referenced
                     type: string
                 type: object
+              repetitionPenalty:
+                description: RepetitionPenalty is the repetition penalty for sampling
+                  in a llm call.
+                type: number
+              seed:
+                description: Seed is a seed for deterministic sampling in a llm call.
+                type: integer
+              stopWords:
+                description: StopWords is a list of words to stop on to use in a llm
+                  call.
+                items:
+                  type: string
+                type: array
+              temperature:
+                description: Temperature is the temperature for sampling to use in
+                  a llm call, between 0 and 1.
+                maximum: 1
+                minimum: 0
+                type: number
+              topK:
+                description: TopK is the number of tokens to consider for top-k sampling
+                  in a llm call.
+                type: integer
+              topP:
+                description: TopP is the cumulative probability for top-p sampling
+                  in a llm call.
+                type: number
             required:
             - input
             - output

diff --git a/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_retrievalqachains.yaml b/config/crd/bases/chain.arcadia.kubeagi.k8s.com.cn_retrievalqachains.yaml
@@ -109,13 +109,40 @@ spec:
                 - prompt
                 - retriever
                 type: object
+              maxLength:
+                description: MaxLength is the maximum length of the generated text
+                  in a llm call.
+                type: integer
+              maxTokens:
+                default: 512
+                description: MaxTokens is the maximum number of tokens to generate
+                  to use in a llm call.
+                maximum: 4096
+                minimum: 10
+                type: integer
               memory:
-                description: 记忆相关参数
+                description: for memory
                 properties:
+                  conversionWindowSize:
+                    default: 5
+                    description: ConversionWindowSize is the maximum number of conversation
+                      rounds in memory.Can only use MaxTokenLimit or ConversionWindowSize.
+                    maximum: 30
+                    minimum: 0
+                    type: integer
                   maxTokenLimit:
-                    description: 能记住的最大 token 数
+                    description: MaxTokenLimit is the maximum number of tokens to
+                      keep in memory. Can only use MaxTokenLimit or ConversionWindowSize.
                     type: integer
                 type: object
+              minLength:
+                description: MinLength is the minimum length of the generated text
+                  in a llm call.
+                type: integer
+              model:
+                description: Model is the model to use in an llm call.like `gpt-3.5-turbo`
+                  or `chatglm_turbo` Usually this value is just empty
+                type: string
               output:
                 properties:
                   apiGroup:
@@ -132,6 +159,33 @@ spec:
                     description: Name is the name of resource being referenced
                     type: string
                 type: object
+              repetitionPenalty:
+                description: RepetitionPenalty is the repetition penalty for sampling
+                  in a llm call.
+                type: number
+              seed:
+                description: Seed is a seed for deterministic sampling in a llm call.
+                type: integer
+              stopWords:
+                description: StopWords is a list of words to stop on to use in a llm
+                  call.
+                items:
+                  type: string
+                type: array
+              temperature:
+                description: Temperature is the temperature for sampling to use in
+                  a llm call, between 0 and 1.
+                maximum: 1
+                minimum: 0
+                type: number
+              topK:
+                description: TopK is the number of tokens to consider for top-k sampling
+                  in a llm call.
+                type: integer
+              topP:
+                description: TopP is the cumulative probability for top-p sampling
+                  in a llm call.
+                type: number
             required:
             - input
             - output

diff --git a/config/crd/bases/retriever.arcadia.kubeagi.k8s.com.cn_knowledgebaseretrievers.yaml b/config/crd/bases/retriever.arcadia.kubeagi.k8s.com.cn_knowledgebaseretrievers.yaml
@@ -45,6 +45,11 @@ spec:
               displayName:
                 description: DisplayName defines datasource display name
                 type: string
+              docNullReturn:
+                default: 未找到您询问的内容，请详细描述您的问题
+                description: DocNullReturn is the return statement when the query
+                  result is empty from the retriever.
+                type: string
               input:
                 properties:
                   apiGroup:
@@ -66,6 +71,12 @@ spec:
                 - apiGroup
                 - kind
                 type: object
+              numDocuments:
+                default: 5
+                description: NumDocuments is the max number of documents to return.
+                maximum: 10
+                minimum: 1
+                type: integer
               output:
                 properties:
                   apiGroup:
@@ -82,6 +93,13 @@ spec:
                     description: Name is the name of resource being referenced
                     type: string
                 type: object
+              scoreThreshold:
+                default: 0.7
+                description: ScoreThreshold is the cosine distance float score threshold.
+                  Lower score represents more similarity.
+                maximum: 1
+                minimum: 0
+                type: number
             type: object
           status:
             description: KnowledgeBaseRetrieverStatus defines the observed state of