parent 3f70da4

author penghou.ho <penghou.ho@techronex.com> 1701341533 +0800 committer penghou.ho <penghou.ho@techronex.com> 1707199703 +0800 parent 3f70da4 author penghou.ho <penghou.ho@techronex.com> 1701341533 +0800 committer penghou.ho <penghou.ho@techronex.com> 1707198697 +0800 parent 3f70da4 author penghou.ho <penghou.ho@techronex.com> 1701341533 +0800 committer penghou.ho <penghou.ho@techronex.com> 1707198521 +0800 Add requirements.txt Create only necesasary tables Remove reference info in chat completion result Set disable_alembic_upgrade to True Comment _initialize_awel Comment mount_static_files Fix torch.has_mps deprecated Add API key Comment unused API endpoints Install rocksdict to enable DiskCacheStorage Fix the chat_knowledge missing in chat_mode Update requirements.txt Re-enable awel and add api key check for simple_rag_example DAG Merge main bdf9442 Disable disable_alembic_upgrade Compile bitsandbytes from source and enable verbose Tune the prompt of chat knowledge to only refer to context Add the web static files and uncomment previous unused APIs Add back routers Enable KNOWLEDGE_CHAT_SHOW_RELATIONS Display relation based on CFG.KNOWLEDGE_CHAT_SHOW_RELATIONS Stop reference add to last_output if KNOWLEDGE_CHAT_SHOW_RELATIONS is false Fix always no reference Improve chinese prompts Update requirements.txt Improve prompt Improve prompt Fix prompt variable name Use openhermes-2.5-mistral-7b.Q4_K_M.gguf 1. Fix the delete issue of LlamaCppModel 2. Disable verbose log 3. Update diskcache 4. Remove conda-pack Update chinese prompt and process the model response Extract result from varying tags Add back missing content_matches and put tags regex into variable Update english prompt and decide CANNOT_ANSWER based on language configuration Add 3 new models entries and upgrade bitsandbytes Add few chat templates Update model conversation with fastchat code Revert "Update model conversation with fastchat code" This reverts commit a5dc4b5. Revert "Add few chat templates" This reverts commit e6b6c99. Add OpenHermes-2.5-Mistral-7B chat template Fix missing messages and offset in chat template Update fschat Remove model adapter debugging logs and added conversation template Update chinese chat knowledge prompt Avoid to save the long chat history messages Update chinese chat knowledge prompt Temporary workaround to make the GGUF file use different chat template Use ADD_COLON_SINGLE instead of FALCON_CHAT for separator style Allow no model_name in chat completion request Use starling-lm-7b-alpha.Q5_K_M.gguf Add empty string as system for openchat_3.5 chat template Undo response regex in generate_streaming refactor: Refactor storage and new serve template (eosphoros-ai#947) feat(core): Add API authentication for serve template (eosphoros-ai#950) ci: Add python unit test workflows (eosphoros-ai#954) feat(model): Support Mixtral-8x7B (eosphoros-ai#959) feat(core): Support multi round conversation operator (eosphoros-ai#986) chore(build): Fix typo and new pre-commit config (eosphoros-ai#987) feat(model): Support SOLAR-10.7B-Instruct-v1.0 (eosphoros-ai#1001) refactor: RAG Refactor (eosphoros-ai#985) Co-authored-by: Aralhi <xiaoping0501@gmail.com> Co-authored-by: csunny <cfqsunny@163.com> Upgrade english prompt for chat knowledge
vshy108 · Feb 6, 2024 · b130d7c · b130d7c
1 parent 3f70da4
commit b130d7c
Show file tree

Hide file tree

Showing 68 changed files with 1,143 additions and 282 deletions.
diff --git a/.env.template b/.env.template
@@ -35,8 +35,8 @@ LLM_MODEL=vicuna-13b-v1.5
 MODEL_SERVER=http://127.0.0.1:8000
 LIMIT_MODEL_CONCURRENCY=5
 MAX_POSITION_EMBEDDINGS=4096
-QUANTIZE_QLORA=True
-QUANTIZE_8bit=True
+QUANTIZE_QLORA=False # True
+QUANTIZE_8bit=False # True
 # QUANTIZE_4bit=False
 ## SMART_LLM_MODEL - Smart language model (Default: vicuna-13b)
 ## FAST_LLM_MODEL - Fast language model (Default: chatglm-6b)
@@ -70,6 +70,8 @@ QUANTIZE_8bit=True
 #**                         EMBEDDING SETTINGS                    **#
 #*******************************************************************#
 EMBEDDING_MODEL=text2vec
+# EMBEDDING_MODEL=text2vec-base
+
 #EMBEDDING_MODEL=m3e-large
 #EMBEDDING_MODEL=bge-large-en
 #EMBEDDING_MODEL=bge-large-zh
@@ -97,15 +99,16 @@ KNOWLEDGE_SEARCH_REWRITE=False
 #**                  DB-GPT METADATA DATABASE SETTINGS            **#
 #*******************************************************************#
 ### SQLite database (Current default database)
-LOCAL_DB_TYPE=sqlite
+# LOCAL_DB_PATH=data/default_sqlite.db
+# LOCAL_DB_TYPE=sqlite
 
 ### MYSQL database
-# LOCAL_DB_TYPE=mysql
-# LOCAL_DB_USER=root
-# LOCAL_DB_PASSWORD={your_password}
-# LOCAL_DB_HOST=127.0.0.1
-# LOCAL_DB_PORT=3306
-# LOCAL_DB_NAME=dbgpt
+LOCAL_DB_TYPE=mysql
+LOCAL_DB_USER=root
+LOCAL_DB_PASSWORD=techronex
+LOCAL_DB_HOST=127.0.0.1
+LOCAL_DB_PORT=3306
+LOCAL_DB_NAME=dbgpt
 ### This option determines the storage location of conversation records. The default is not configured to the old version of duckdb. It can be optionally db or file (if the value is db, the database configured by LOCAL_DB will be used)
 #CHAT_HISTORY_STORE_TYPE=db
 
@@ -219,4 +222,9 @@ SUMMARY_CONFIG=FAST
 # FATAL, ERROR, WARNING, WARNING, INFO, DEBUG, NOTSET
 DBGPT_LOG_LEVEL=INFO
 # LOG dir, default: ./logs
-#DBGPT_LOG_DIR=
+#DBGPT_LOG_DIR=
+
+#*******************************************************************#
+#**                         API KEY                               **#
+#*******************************************************************#
+FAST_API_KEY=FAST_API_KEY
diff --git a/assets/schema/modified_knowledge_management.sql b/assets/schema/modified_knowledge_management.sql
@@ -0,0 +1,53 @@
+-- Create only necesasary tables
+CREATE DATABASE IF NOT EXISTS dbvicuna;
+use dbvicuna;
+
+-- For alembic migration tool
+CREATE TABLE `alembic_version` (
+      version_num VARCHAR(32) NOT NULL,
+      CONSTRAINT alembic_version_pkc PRIMARY KEY (version_num)
+);
+
+CREATE TABLE `knowledge_space` (
+  `id` int NOT NULL AUTO_INCREMENT COMMENT 'auto increment id',
+  `name` varchar(100) NOT NULL COMMENT 'knowledge space name',
+  `vector_type` varchar(50) NOT NULL COMMENT 'vector type',
+  `desc` varchar(500) NOT NULL COMMENT 'description',
+  `owner` varchar(100) DEFAULT NULL COMMENT 'owner',
+  `context` TEXT DEFAULT NULL COMMENT 'context argument',
+  `gmt_created` TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT 'created time',
+  `gmt_modified` TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'update time',
+  PRIMARY KEY (`id`),
+  KEY `idx_name` (`name`) COMMENT 'index:idx_name'
+) ENGINE=InnoDB AUTO_INCREMENT=100001 DEFAULT CHARSET=utf8mb4 COMMENT='knowledge space table';
+
+CREATE TABLE `knowledge_document` (
+  `id` int NOT NULL AUTO_INCREMENT COMMENT 'auto increment id',
+  `doc_name` varchar(100) NOT NULL COMMENT 'document path name',
+  `doc_type` varchar(50) NOT NULL COMMENT 'doc type',
+  `space` varchar(50) NOT NULL COMMENT 'knowledge space',
+  `chunk_size` int NOT NULL COMMENT 'chunk size',
+  `last_sync` TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT 'last sync time',
+  `status` varchar(50) NOT NULL COMMENT 'status TODO,RUNNING,FAILED,FINISHED',
+  `content` LONGTEXT NOT NULL COMMENT 'knowledge embedding sync result',
+  `result` TEXT NULL COMMENT 'knowledge content',
+  `vector_ids` LONGTEXT NULL COMMENT 'vector_ids',
+  `summary` LONGTEXT NULL COMMENT 'knowledge summary',
+  `gmt_created` TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT 'created time',
+  `gmt_modified` TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'update time',
+  PRIMARY KEY (`id`),
+  KEY `idx_doc_name` (`doc_name`) COMMENT 'index:idx_doc_name'
+) ENGINE=InnoDB AUTO_INCREMENT=100001 DEFAULT CHARSET=utf8mb4 COMMENT='knowledge document table';
+
+CREATE TABLE `document_chunk` (
+  `id` int NOT NULL AUTO_INCREMENT COMMENT 'auto increment id',
+  `doc_name` varchar(100) NOT NULL COMMENT 'document path name',
+  `doc_type` varchar(50) NOT NULL COMMENT 'doc type',
+  `document_id` int NOT NULL COMMENT 'document parent id',
+  `content` longtext NOT NULL COMMENT 'chunk content',
+  `meta_info` varchar(200) NOT NULL COMMENT 'metadata info',
+  `gmt_created` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT 'created time',
+  `gmt_modified` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'update time',
+  PRIMARY KEY (`id`),
+  KEY `idx_document_id` (`document_id`) COMMENT 'index:document_id'
+) ENGINE=InnoDB AUTO_INCREMENT=100001 DEFAULT CHARSET=utf8mb4 COMMENT='knowledge document chunk detail';
diff --git a/dbgpt/_private/config.py b/dbgpt/_private/config.py
@@ -47,6 +47,9 @@ def __init__(self) -> None:
         # This is a proxy server, just for test_py.  we will remove this later.
         self.proxy_api_key = os.getenv("PROXY_API_KEY")
         self.bard_proxy_api_key = os.getenv("BARD_PROXY_API_KEY")
+
+        # NOTE: api key used for api_v1 and pilot servers
+        self.fast_api_key = os.getenv("FAST_API_KEY")
 
         # In order to be compatible with the new and old model parameter design
         if self.bard_proxy_api_key:

diff --git a/dbgpt/agent/agents/agent.py b/dbgpt/agent/agents/agent.py
@@ -154,7 +154,6 @@ async def a_verify_reply(
     def reset(self) -> None:
         """(Abstract method) Reset the agent."""
 
-
 @dataclasses.dataclass
 class AgentResource:
     type: str

diff --git a/dbgpt/agent/agents/expand/plugin_assistant_agent.py b/dbgpt/agent/agents/expand/plugin_assistant_agent.py
@@ -14,14 +14,6 @@
 from ..agent import Agent, AgentContext
 from ..base_agent import ConversableAgent
 
-try:
-    from termcolor import colored
-except ImportError:
-
-    def colored(x, *args, **kwargs):
-        return x
-
-
 logger = logging.getLogger(__name__)
 
 
@@ -98,6 +90,7 @@ def __init__(
         )
 
     async def a_system_fill_param(self):
+        # TODO no db_connect attribute
         params = {
             "tool_list": self.plugin_generator.generate_commands_string(),
         }

diff --git a/dbgpt/app/base.py b/dbgpt/app/base.py
@@ -297,7 +297,8 @@ class WebServerParameters(BaseParameters):
         },
     )
     disable_alembic_upgrade: Optional[bool] = field(
-        default=False,
+        # NOTE: if False, it will generate all tables that was defined
+        default=True,
         metadata={
             "help": "Whether to disable alembic to initialize and upgrade database metadata",
         },

diff --git a/dbgpt/app/chat_adapter.py b/dbgpt/app/chat_adapter.py
@@ -255,7 +255,9 @@ def match(self, model_path: str):
         return is_match
 
     def get_conv_template(self, model_path: str) -> Conversation:
-        return get_conv_template("llama-2")
+        # return get_conv_template("llama-2")
+        # NOTE: temporary workaround to make the GGUF file use different chat template
+        return get_conv_template("openchat_3.5")
 
     def get_generate_stream_func(self, model_path: str):
         from dbgpt.model.llm_out.llama_cpp_llm import generate_stream

diff --git a/dbgpt/app/component_configs.py b/dbgpt/app/component_configs.py
@@ -20,9 +20,9 @@ def initialize_components(
     embedding_model_path: str,
 ):
     # Lazy import to avoid high time cost
+    from dbgpt.model.cluster.controller.controller import controller
     from dbgpt.app.initialization.embedding_component import _initialize_embedding_model
     from dbgpt.app.initialization.serve_initialization import register_serve_apps
-    from dbgpt.model.cluster.controller.controller import controller
 
     # Register global default executor factory first
     system_app.register(

diff --git a/dbgpt/app/knowledge/api.py b/dbgpt/app/knowledge/api.py
@@ -4,7 +4,7 @@
 import tempfile
 from typing import List
 
-from fastapi import APIRouter, File, Form, UploadFile
+from fastapi import APIRouter, Depends, File, UploadFile, Form
 
 from dbgpt._private.config import Config
 from dbgpt.app.knowledge.request.request import (
@@ -34,6 +34,7 @@
 from dbgpt.storage.vector_store.base import VectorStoreConfig
 from dbgpt.storage.vector_store.connector import VectorStoreConnector
 from dbgpt.util.tracer import SpanType, root_tracer
+from dbgpt.util.api_utils import _check_api_key
 
 logger = logging.getLogger(__name__)
 
@@ -44,7 +45,7 @@
 knowledge_space_service = KnowledgeService()
 
 
-@router.post("/knowledge/space/add")
+@router.post("/knowledge/space/add", dependencies=[Depends(_check_api_key)])
 def space_add(request: KnowledgeSpaceRequest):
     print(f"/space/add params: {request}")
     try:
@@ -54,7 +55,7 @@ def space_add(request: KnowledgeSpaceRequest):
         return Result.failed(code="E000X", msg=f"space add error {e}")
 
 
-@router.post("/knowledge/space/list")
+@router.post("/knowledge/space/list", dependencies=[Depends(_check_api_key)])
 def space_list(request: KnowledgeSpaceRequest):
     print(f"/space/list params:")
     try:
@@ -63,7 +64,7 @@ def space_list(request: KnowledgeSpaceRequest):
         return Result.failed(code="E000X", msg=f"space list error {e}")
 
 
-@router.post("/knowledge/space/delete")
+@router.post("/knowledge/space/delete", dependencies=[Depends(_check_api_key)])
 def space_delete(request: KnowledgeSpaceRequest):
     print(f"/space/delete params:")
     try:
@@ -72,7 +73,7 @@ def space_delete(request: KnowledgeSpaceRequest):
         return Result.failed(code="E000X", msg=f"space list error {e}")
 
 
-@router.post("/knowledge/{space_name}/arguments")
+@router.post("/knowledge/{space_name}/arguments", dependencies=[Depends(_check_api_key)])
 def arguments(space_name: str):
     print(f"/knowledge/space/arguments params:")
     try:
@@ -81,7 +82,7 @@ def arguments(space_name: str):
         return Result.failed(code="E000X", msg=f"space list error {e}")
 
 
-@router.post("/knowledge/{space_name}/argument/save")
+@router.post("/knowledge/{space_name}/argument/save", dependencies=[Depends(_check_api_key)])
 def arguments_save(space_name: str, argument_request: SpaceArgumentRequest):
     print(f"/knowledge/space/argument/save params:")
     try:
@@ -92,7 +93,7 @@ def arguments_save(space_name: str, argument_request: SpaceArgumentRequest):
         return Result.failed(code="E000X", msg=f"space list error {e}")
 
 
-@router.post("/knowledge/{space_name}/document/add")
+@router.post("/knowledge/{space_name}/document/add", dependencies=[Depends(_check_api_key)])
 def document_add(space_name: str, request: KnowledgeDocumentRequest):
     print(f"/document/add params: {space_name}, {request}")
     try:
@@ -106,7 +107,7 @@ def document_add(space_name: str, request: KnowledgeDocumentRequest):
         return Result.failed(code="E000X", msg=f"document add error {e}")
 
 
-@router.get("/knowledge/document/chunkstrategies")
+@router.get("/knowledge/document/chunkstrategies", dependencies=[Depends(_check_api_key)])
 def chunk_strategies():
     """Get chunk strategies"""
     print(f"/document/chunkstrategies:")
@@ -139,7 +140,7 @@ def chunk_strategies():
         return Result.failed(code="E000X", msg=f"chunk strategies error {e}")
 
 
-@router.post("/knowledge/{space_name}/document/list")
+@router.post("/knowledge/{space_name}/document/list", dependencies=[Depends(_check_api_key)])
 def document_list(space_name: str, query_request: DocumentQueryRequest):
     print(f"/document/list params: {space_name}, {query_request}")
     try:
@@ -150,7 +151,7 @@ def document_list(space_name: str, query_request: DocumentQueryRequest):
         return Result.failed(code="E000X", msg=f"document list error {e}")
 
 
-@router.post("/knowledge/{space_name}/document/delete")
+@router.post("/knowledge/{space_name}/document/delete", dependencies=[Depends(_check_api_key)])
 def document_delete(space_name: str, query_request: DocumentQueryRequest):
     print(f"/document/list params: {space_name}, {query_request}")
     try:
@@ -161,7 +162,7 @@ def document_delete(space_name: str, query_request: DocumentQueryRequest):
         return Result.failed(code="E000X", msg=f"document list error {e}")
 
 
-@router.post("/knowledge/{space_name}/document/upload")
+@router.post("/knowledge/{space_name}/document/upload", dependencies=[Depends(_check_api_key)])
 async def document_upload(
     space_name: str,
     doc_name: str = Form(...),
@@ -213,7 +214,7 @@ async def document_upload(
         return Result.failed(code="E000X", msg=f"document add error {e}")
 
 
-@router.post("/knowledge/{space_name}/document/sync")
+@router.post("/knowledge/{space_name}/document/sync", dependencies=[Depends(_check_api_key)])
 def document_sync(space_name: str, request: DocumentSyncRequest):
     logger.info(f"Received params: {space_name}, {request}")
     try:
@@ -225,7 +226,7 @@ def document_sync(space_name: str, request: DocumentSyncRequest):
         return Result.failed(code="E000X", msg=f"document sync error {e}")
 
 
-@router.post("/knowledge/{space_name}/document/sync_batch")
+@router.post("/knowledge/{space_name}/document/sync_batch", dependencies=[Depends(_check_api_key)])
 def batch_document_sync(space_name: str, request: List[KnowledgeSyncRequest]):
     logger.info(f"Received params: {space_name}, {request}")
     try:
@@ -237,7 +238,7 @@ def batch_document_sync(space_name: str, request: List[KnowledgeSyncRequest]):
         return Result.failed(code="E000X", msg=f"document sync error {e}")
 
 
-@router.post("/knowledge/{space_name}/chunk/list")
+@router.post("/knowledge/{space_name}/chunk/list", dependencies=[Depends(_check_api_key)])
 def document_list(space_name: str, query_request: ChunkQueryRequest):
     print(f"/document/list params: {space_name}, {query_request}")
     try:
@@ -246,7 +247,7 @@ def document_list(space_name: str, query_request: ChunkQueryRequest):
         return Result.failed(code="E000X", msg=f"document chunk list error {e}")
 
 
-@router.post("/knowledge/{vector_name}/query")
+@router.post("/knowledge/{vector_name}/query", dependencies=[Depends(_check_api_key)])
 def similar_query(space_name: str, query_request: KnowledgeQueryRequest):
     print(f"Received params: {space_name}, {query_request}")
     embedding_factory = CFG.SYSTEM_APP.get_component(
@@ -273,7 +274,7 @@ def similar_query(space_name: str, query_request: KnowledgeQueryRequest):
     return {"response": res}
 
 
-@router.post("/knowledge/document/summary")
+@router.post("/knowledge/document/summary", dependencies=[Depends(_check_api_key)])
 async def document_summary(request: DocumentSummaryRequest):
     print(f"/document/summary params: {request}")
     try:
@@ -305,7 +306,7 @@ async def document_summary(request: DocumentSummaryRequest):
         return Result.failed(code="E000X", msg=f"document summary error {e}")
 
 
-@router.post("/knowledge/entity/extract")
+@router.post("/knowledge/entity/extract", dependencies=[Depends(_check_api_key)])
 async def entity_extract(request: EntityExtractRequest):
     logger.info(f"Received params: {request}")
     try:

diff --git a/dbgpt/app/knowledge/service.py b/dbgpt/app/knowledge/service.py
@@ -574,6 +574,7 @@ def get_space_context(self, space_name):
         request.name = space_name
         spaces = self.get_knowledge_space(request)
         if len(spaces) != 1:
+            # TODO: Should not return internal server error here
             raise Exception(
                 f"have not found {space_name} space or found more than one space called {space_name}"
             )