Skip to content

Commit

Permalink
feat(ChatKnowledge): ChatKnowledge Support Keyword Retrieve (eosphoro…
Browse files Browse the repository at this point in the history
…s-ai#1624)

Co-authored-by: Fangyin Cheng <staneyffer@gmail.com>
  • Loading branch information
2 people authored and Hopshine committed Sep 10, 2024
1 parent c619835 commit ec783c2
Show file tree
Hide file tree
Showing 86 changed files with 951 additions and 443 deletions.
29 changes: 14 additions & 15 deletions dbgpt/agent/core/memory/hybrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
import os.path
from concurrent.futures import Executor, ThreadPoolExecutor
from datetime import datetime
from typing import TYPE_CHECKING, Generic, List, Optional, Tuple, Type
from typing import Generic, List, Optional, Tuple, Type

from dbgpt.core import Embeddings, LLMClient
from dbgpt.storage.vector_store.base import VectorStoreBase
from dbgpt.util.annotations import immutable, mutable

from .base import (
Expand All @@ -26,9 +27,6 @@
from .long_term import LongTermMemory
from .short_term import EnhancedShortTermMemory

if TYPE_CHECKING:
from dbgpt.storage.vector_store.connector import VectorStoreConnector


class HybridMemory(Memory, Generic[T]):
"""Hybrid memory for the agent."""
Expand Down Expand Up @@ -81,8 +79,10 @@ def from_chroma(
):
"""Create a hybrid memory from Chroma vector store."""
from dbgpt.configs.model_config import DATA_DIR
from dbgpt.storage.vector_store.chroma_store import ChromaVectorConfig
from dbgpt.storage.vector_store.connector import VectorStoreConnector
from dbgpt.storage.vector_store.chroma_store import (
ChromaStore,
ChromaVectorConfig,
)

if not embeddings:
from dbgpt.rag.embedding import DefaultEmbeddingFactory
Expand All @@ -91,16 +91,15 @@ def from_chroma(

vstore_path = vstore_path or os.path.join(DATA_DIR, "agent_memory")

vector_store_connector = VectorStoreConnector.from_default(
vector_store_type="Chroma",
embedding_fn=embeddings,
vector_store_config=ChromaVectorConfig(
vector_store = ChromaStore(
ChromaVectorConfig(
name=vstore_name,
persist_path=vstore_path,
),
embedding_fn=embeddings,
)
)
return cls.from_vstore(
vector_store_connector=vector_store_connector,
vector_store=vector_store,
embeddings=embeddings,
executor=executor,
now=now,
Expand All @@ -113,7 +112,7 @@ def from_chroma(
@classmethod
def from_vstore(
cls,
vector_store_connector: "VectorStoreConnector",
vector_store: "VectorStoreBase",
embeddings: Optional[Embeddings] = None,
executor: Optional[Executor] = None,
now: Optional[datetime] = None,
Expand All @@ -124,7 +123,7 @@ def from_vstore(
):
"""Create a hybrid memory from vector store."""
if not embeddings:
embeddings = vector_store_connector.current_embeddings
raise ValueError("embeddings is required.")
if not executor:
executor = ThreadPoolExecutor()
if not now:
Expand All @@ -139,7 +138,7 @@ def from_vstore(
if not long_term_memory:
long_term_memory = LongTermMemory(
executor,
vector_store_connector,
vector_store,
now=now,
)
return cls(now, sensory_memory, short_term_memory, long_term_memory, **kwargs)
Expand Down
10 changes: 5 additions & 5 deletions dbgpt/agent/core/memory/long_term.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from dbgpt.core import Chunk
from dbgpt.rag.retriever.time_weighted import TimeWeightedEmbeddingRetriever
from dbgpt.storage.vector_store.connector import VectorStoreConnector
from dbgpt.storage.vector_store.base import VectorStoreBase
from dbgpt.storage.vector_store.filters import MetadataFilters
from dbgpt.util.annotations import immutable, mutable
from dbgpt.util.executor_utils import blocking_func_to_async
Expand Down Expand Up @@ -70,7 +70,7 @@ class LongTermMemory(Memory, Generic[T]):
def __init__(
self,
executor: Executor,
vector_store_connector: VectorStoreConnector,
vector_store: VectorStoreBase,
now: Optional[datetime] = None,
reflection_threshold: Optional[float] = None,
):
Expand All @@ -81,9 +81,9 @@ def __init__(
self.forgetting: bool = False
self.reflection_threshold: Optional[float] = reflection_threshold
self.aggregate_importance: float = 0.0
self._vector_store_connector = vector_store_connector
self._vector_store = vector_store
self.memory_retriever = LongTermRetriever(
now=self.now, vector_store_connector=vector_store_connector
now=self.now, index_store=vector_store
)

@immutable
Expand All @@ -97,7 +97,7 @@ def structure_clone(
m: LongTermMemory[T] = LongTermMemory(
now=now,
executor=self.executor,
vector_store_connector=self._vector_store_connector.new_connector(new_name),
vector_store=self._vector_store,
reflection_threshold=self.reflection_threshold,
)
m._copy_from(self)
Expand Down
4 changes: 2 additions & 2 deletions dbgpt/app/knowledge/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@
from dbgpt.rag.knowledge.factory import KnowledgeFactory
from dbgpt.rag.retriever.embedding import EmbeddingRetriever
from dbgpt.serve.rag.api.schemas import KnowledgeSyncRequest
from dbgpt.serve.rag.connector import VectorStoreConnector
from dbgpt.serve.rag.service.service import Service
from dbgpt.storage.vector_store.base import VectorStoreConfig
from dbgpt.storage.vector_store.connector import VectorStoreConnector
from dbgpt.util.tracer import SpanType, root_tracer

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -310,7 +310,7 @@ def similar_query(space_name: str, query_request: KnowledgeQueryRequest):
vector_store_config=config,
)
retriever = EmbeddingRetriever(
top_k=query_request.top_k, vector_store_connector=vector_store_connector
top_k=query_request.top_k, index_store=vector_store_connector.index_client
)
chunks = retriever.retrieve(query_request.query)
res = [
Expand Down
6 changes: 2 additions & 4 deletions dbgpt/app/knowledge/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
ChunkQueryRequest,
DocumentQueryRequest,
DocumentSummaryRequest,
DocumentSyncRequest,
KnowledgeDocumentRequest,
KnowledgeSpaceRequest,
SpaceArgumentRequest,
Expand All @@ -24,19 +23,18 @@
)
from dbgpt.component import ComponentType
from dbgpt.configs.model_config import EMBEDDING_MODEL_CONFIG
from dbgpt.core import Chunk, LLMClient
from dbgpt.core import LLMClient
from dbgpt.model import DefaultLLMClient
from dbgpt.model.cluster import WorkerManagerFactory
from dbgpt.rag.assembler.embedding import EmbeddingAssembler
from dbgpt.rag.assembler.summary import SummaryAssembler
from dbgpt.rag.chunk_manager import ChunkParameters
from dbgpt.rag.embedding.embedding_factory import EmbeddingFactory
from dbgpt.rag.knowledge.base import KnowledgeType
from dbgpt.rag.knowledge.factory import KnowledgeFactory
from dbgpt.serve.rag.connector import VectorStoreConnector
from dbgpt.serve.rag.models.models import KnowledgeSpaceDao, KnowledgeSpaceEntity
from dbgpt.serve.rag.service.service import SyncStatus
from dbgpt.storage.vector_store.base import VectorStoreConfig
from dbgpt.storage.vector_store.connector import VectorStoreConnector
from dbgpt.util.executor_utils import ExecutorFactory, blocking_func_to_async
from dbgpt.util.tracer import root_tracer, trace

Expand Down
4 changes: 2 additions & 2 deletions dbgpt/app/scene/chat_knowledge/v1/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def __init__(self, chat_param: Dict):
"embedding_factory", EmbeddingFactory
)
from dbgpt.rag.retriever.embedding import EmbeddingRetriever
from dbgpt.storage.vector_store.connector import VectorStoreConnector
from dbgpt.serve.rag.connector import VectorStoreConnector

embedding_fn = embedding_factory.create(
model_name=EMBEDDING_MODEL_CONFIG[CFG.EMBEDDING_MODEL]
Expand Down Expand Up @@ -116,7 +116,7 @@ def __init__(self, chat_param: Dict):
retriever_top_k = max(CFG.RERANK_TOP_K, 20)
self.embedding_retriever = EmbeddingRetriever(
top_k=retriever_top_k,
vector_store_connector=vector_store_connector,
index_store=vector_store_connector.index_client,
query_rewrite=query_rewrite,
rerank=reranker,
)
Expand Down
2 changes: 1 addition & 1 deletion dbgpt/app/static/404.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dbgpt/app/static/404/index.html

Large diffs are not rendered by default.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Large diffs are not rendered by default.

Loading

0 comments on commit ec783c2

Please sign in to comment.