Skip to content

Commit

Permalink
fix csvagent bug (#41)
Browse files Browse the repository at this point in the history
1. 修复csvAgent bug 优化
2. HostEmbedding等node
  • Loading branch information
yaojin3616 committed Sep 18, 2023
2 parents b3f5303 + ccf523d commit 906d9b8
Show file tree
Hide file tree
Showing 9 changed files with 146 additions and 125 deletions.
10 changes: 9 additions & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ jobs:
username: ${{ secrets.NEXUS_USER }}
password: ${{ secrets.NEXUS_PASSWORD }}

- name: Login docker hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

# 替换poetry编译为私有服务
- name: replace self-host repo
uses: snok/install-poetry@v1
Expand All @@ -71,7 +77,7 @@ jobs:
- name: build lock
run: |
cd ./src/backend
poetry source add --priority=default foo http://${{ secrets.NEXUS_PUBLIC }}:${{ secrets.NEXUS_PUBLIC_PASSWORD }}@${{ env.PY_NEXUS }}/repository/pypi-group/simple
poetry source add --priority=supplemental foo http://${{ secrets.NEXUS_PUBLIC }}:${{ secrets.NEXUS_PUBLIC_PASSWORD }}@${{ env.PY_NEXUS }}/repository/pypi-group/simple
poetry lock
cd ../../
Expand All @@ -91,6 +97,7 @@ jobs:
# 生成两个 docker tag: ${APP_VERSION} 和 latest
tags: |
${{ env.DOCKER_NEXUS }}/${{ env.DOCKERHUB_REPO }}bisheng-backend:release
${{ env.DOCKERHUB_REPO }}bisheng-backend:release
# 构建 Docker frontend 并推送到 Docker hub
- name: Build frontend and push
id: docker_build_frontend
Expand All @@ -107,5 +114,6 @@ jobs:
# 生成两个 docker tag: ${APP_VERSION} 和 latest
tags: |
${{ env.DOCKER_NEXUS }}/${{ env.DOCKERHUB_REPO }}bisheng-frontend:release
${{ env.DOCKERHUB_REPO }}bisheng-frontend:release
7 changes: 5 additions & 2 deletions docker/bisheng/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ admin:
# bisheng-rt服务地址
bisheng-rt:
name: "RT-Server"
server: "127.0.0.1:9001"
server: "192.168.0.1:9001"

# 为知识库的embedding进行模型撇脂
knowledges:
Expand All @@ -25,7 +25,10 @@ knowledges:
# Milvus 最低要求cpu 4C 8G 推荐4C 16G
Milvus: # 如果需要切换其他vectordb,确保其他服务已经启动,然后配置对应参数
connection_args: {'host': '110.16.193.170', 'port': '50032', 'user': '', 'password': '', 'secure': False}

ElasticKeywordsSearch:
elasticsearch_url: 'https://192.168.106.14:9200'
ssl_verify: {'ca_certs': False, 'basic_auth': "('elastic', 'F94h5JtdQn6EQB-G9Hjv')", 'verify_certs': False}

agents:
ZeroShotAgent:
documentation: "https://python.langchain.com/docs/modules/agents/how_to/custom_mrkl_agent"
Expand Down
44 changes: 22 additions & 22 deletions src/backend/bisheng/api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,17 +74,17 @@ def build_flow(graph_data: dict, artifacts, process_file=False, flow_id=None, ch
'log': f'Building node {vertex.vertex_type}',
}
yield str(StreamData(event='log', data=log_dict))
# 如果存在文件,当前不操作文件,避免重复操作
if not process_file:
template_dict = {
key: value
for key, value in vertex.data['node']['template'].items()
if isinstance(value, dict)
}
for key, value in template_dict.items():
if value.get('type') == 'file':
# 过滤掉文件
vertex.params[key] = ''
# # 如果存在文件,当前不操作文件,避免重复操作
# if not process_file and chat_id is not None:
# template_dict = {
# key: value
# for key, value in vertex.data['node']['template'].items()
# if isinstance(value, dict)
# }
# for key, value in template_dict.items():
# if value.get('type') == 'file':
# # 过滤掉文件
# vertex.params[key] = ''

# vectore store 引入自动建库逻辑
# 聊天窗口等flow 主动生成的vector 需要新建临时collection
Expand Down Expand Up @@ -139,17 +139,17 @@ def build_flow_no_yield(graph_data: dict,

for i, vertex in enumerate(graph.generator_build(), 1):
try:
# 如果存在文件,当前不操作文件,避免重复操作
if not process_file:
template_dict = {
key: value
for key, value in vertex.data['node']['template'].items()
if isinstance(value, dict)
}
for key, value in template_dict.items():
if value.get('type') == 'file':
# 过滤掉文件
vertex.params[key] = ''
# # 如果存在文件,当前不操作文件,避免重复操作
# if not process_file:
# template_dict = {
# key: value
# for key, value in vertex.data['node']['template'].items()
# if isinstance(value, dict)
# }
# for key, value in template_dict.items():
# if value.get('type') == 'file':
# # 过滤掉文件
# vertex.params[key] = ''

# vectore store 引入自动建库逻辑
# 聊天窗口等flow 主动生成的vector 需要新建临时collection
Expand Down
122 changes: 56 additions & 66 deletions src/backend/bisheng/interface/agents/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,15 @@

from bisheng.interface.base import CustomAgentExecutor
from langchain import LLMChain
from langchain.agents import (AgentExecutor, AgentType, Tool, ZeroShotAgent,
initialize_agent)
from langchain.agents.agent_toolkits import (SQLDatabaseToolkit,
VectorStoreInfo,
VectorStoreRouterToolkit,
VectorStoreToolkit)
from langchain.agents.agent_toolkits.json.prompt import (JSON_PREFIX,
JSON_SUFFIX)
from langchain.agents import AgentExecutor, AgentType, Tool, ZeroShotAgent, initialize_agent
from langchain.agents.agent_toolkits import (SQLDatabaseToolkit, VectorStoreInfo,
VectorStoreRouterToolkit, VectorStoreToolkit)
from langchain.agents.agent_toolkits.json.prompt import JSON_PREFIX, JSON_SUFFIX
from langchain.agents.agent_toolkits.json.toolkit import JsonToolkit
from langchain.agents.agent_toolkits.pandas.prompt import \
PREFIX as PANDAS_PREFIX
from langchain.agents.agent_toolkits.pandas.prompt import \
SUFFIX_WITH_DF as PANDAS_SUFFIX
from langchain.agents.agent_toolkits.pandas.prompt import PREFIX as PANDAS_PREFIX
from langchain.agents.agent_toolkits.pandas.prompt import SUFFIX_WITH_DF as PANDAS_SUFFIX
from langchain.agents.agent_toolkits.sql.prompt import SQL_PREFIX, SQL_SUFFIX
from langchain.agents.agent_toolkits.vectorstore.prompt import \
PREFIX as VECTORSTORE_PREFIX
from langchain.agents.agent_toolkits.vectorstore.prompt import PREFIX as VECTORSTORE_PREFIX
from langchain.agents.agent_toolkits.vectorstore.prompt import \
ROUTER_PREFIX as VECTORSTORE_ROUTER_PREFIX
from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS
Expand Down Expand Up @@ -58,7 +51,8 @@ def from_toolkit_and_llm(cls, toolkit: JsonToolkit, llm: BaseLanguageModel):
prompt=prompt,
)
agent = ZeroShotAgent(
llm_chain=llm_chain, allowed_tools=tool_names # type: ignore
llm_chain=llm_chain,
allowed_tools=tool_names # type: ignore
)
return cls.from_agent_and_tools(agent=agent, tools=tools, verbose=True)

Expand All @@ -82,16 +76,15 @@ def __init__(self, *args, **kwargs):

@classmethod
def from_toolkit_and_llm(
cls,
path: str,
llm: BaseLanguageModel,
pandas_kwargs: Optional[dict] = None,
prefix: str =PANDAS_PREFIX,
suffix: str=PANDAS_SUFFIX,
format_instructions: str = FORMAT_INSTRUCTIONS,
input_variables: Optional[List[str]]=['df_head', 'input', 'agent_scratchpad'],
**kwargs: Any
):
cls,
path: str,
llm: BaseLanguageModel,
pandas_kwargs: Optional[dict] = None,
prefix: str = PANDAS_PREFIX,
suffix: str = PANDAS_SUFFIX,
format_instructions: str = FORMAT_INSTRUCTIONS,
input_variables: Optional[List[str]] = ['df_head', 'input', 'agent_scratchpad'],
**kwargs: Any):
import pandas as pd # type: ignore

_kwargs = pandas_kwargs or {}
Expand All @@ -102,7 +95,7 @@ def from_toolkit_and_llm(
tools,
prefix=prefix,
suffix=suffix,
format_instructions= format_instructions,
format_instructions=format_instructions,
input_variables=input_variables,
)
partial_prompt = prompt.partial(df_head=str(df.head()))
Expand All @@ -112,7 +105,9 @@ def from_toolkit_and_llm(
)
tool_names = {tool.name for tool in tools}
agent = ZeroShotAgent(
llm_chain=llm_chain, allowed_tools=tool_names, **kwargs # type: ignore
llm_chain=llm_chain,
allowed_tools=tool_names,
**kwargs # type: ignore
)

return cls.from_agent_and_tools(agent=agent, tools=tools, verbose=True)
Expand All @@ -136,9 +131,8 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

@classmethod
def from_toolkit_and_llm(
cls, llm: BaseLanguageModel, vectorstoreinfo: VectorStoreInfo, **kwargs: Any
):
def from_toolkit_and_llm(cls, llm: BaseLanguageModel, vectorstoreinfo: VectorStoreInfo,
**kwargs: Any):
"""Construct a vectorstore agent from an LLM and tools."""

toolkit = VectorStoreToolkit(vectorstore_info=vectorstoreinfo, llm=llm)
Expand All @@ -151,11 +145,14 @@ def from_toolkit_and_llm(
)
tool_names = {tool.name for tool in tools}
agent = ZeroShotAgent(
llm_chain=llm_chain, allowed_tools=tool_names, **kwargs # type: ignore
)
return AgentExecutor.from_agent_and_tools(
agent=agent, tools=tools, verbose=True, handle_parsing_errors=True
llm_chain=llm_chain,
allowed_tools=tool_names,
**kwargs # type: ignore
)
return AgentExecutor.from_agent_and_tools(agent=agent,
tools=tools,
verbose=True,
handle_parsing_errors=True)

def run(self, *args, **kwargs):
return super().run(*args, **kwargs)
Expand All @@ -176,17 +173,15 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

@classmethod
def from_toolkit_and_llm(
cls,
llm: BaseLanguageModel,
database_uri: str,
top_k: int = 10,
prefix: str = SQL_PREFIX,
suffix: str=SQL_SUFFIX,
format_instructions:str=FORMAT_INSTRUCTIONS,
input_variables: Optional[List[str]] = ['input', 'agent_scratchpad'],
**kwargs: Any
):
def from_toolkit_and_llm(cls,
llm: BaseLanguageModel,
database_uri: str,
top_k: int = 10,
prefix: str = SQL_PREFIX,
suffix: str = SQL_SUFFIX,
format_instructions: str = FORMAT_INSTRUCTIONS,
input_variables: Optional[List[str]] = ['input', 'agent_scratchpad'],
**kwargs: Any):
"""Construct an SQL agent from an LLM and tools."""
db = SQLDatabase.from_uri(database_uri)
toolkit = SQLDatabaseToolkit(db=db, llm=llm)
Expand All @@ -204,9 +199,7 @@ def from_toolkit_and_llm(

llmchain = LLMChain(
llm=llm,
prompt=PromptTemplate(
template=QUERY_CHECKER, input_variables=['query', 'dialect']
),
prompt=PromptTemplate(template=QUERY_CHECKER, input_variables=['query', 'dialect']),
)

tools = [
Expand All @@ -222,15 +215,17 @@ def from_toolkit_and_llm(
prefix=prefix,
suffix=suffix,
format_instructions=format_instructions,
input_variables = input_variables,
input_variables=input_variables,
)
llm_chain = LLMChain(
llm=llm,
prompt=prompt,
)
tool_names = {tool.name for tool in tools} # type: ignore
agent = ZeroShotAgent(
llm_chain=llm_chain, allowed_tools=tool_names, **kwargs # type: ignore
llm_chain=llm_chain,
allowed_tools=tool_names,
**kwargs # type: ignore
)
return AgentExecutor.from_agent_and_tools(
agent=agent,
Expand Down Expand Up @@ -260,31 +255,27 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

@classmethod
def from_toolkit_and_llm(
cls,
llm: BaseLanguageModel,
vectorstoreroutertoolkit: VectorStoreRouterToolkit,
**kwargs: Any
):
def from_toolkit_and_llm(cls, llm: BaseLanguageModel,
vectorstoreroutertoolkit: VectorStoreRouterToolkit, **kwargs: Any):
"""Construct a vector store router agent from an LLM and tools."""

tools = (
vectorstoreroutertoolkit
if isinstance(vectorstoreroutertoolkit, list)
else vectorstoreroutertoolkit.get_tools()
)
tools = (vectorstoreroutertoolkit if isinstance(vectorstoreroutertoolkit, list) else
vectorstoreroutertoolkit.get_tools())
prompt = ZeroShotAgent.create_prompt(tools, prefix=VECTORSTORE_ROUTER_PREFIX)
llm_chain = LLMChain(
llm=llm,
prompt=prompt,
)
tool_names = {tool.name for tool in tools}
agent = ZeroShotAgent(
llm_chain=llm_chain, allowed_tools=tool_names, **kwargs # type: ignore
)
return AgentExecutor.from_agent_and_tools(
agent=agent, tools=tools, verbose=True, handle_parsing_errors=True
llm_chain=llm_chain,
allowed_tools=tool_names,
**kwargs # type: ignore
)
return AgentExecutor.from_agent_and_tools(agent=agent,
tools=tools,
verbose=True,
handle_parsing_errors=True)

def run(self, *args, **kwargs):
return super().run(*args, **kwargs)
Expand All @@ -304,7 +295,6 @@ def initialize(
tools: List[Tool],
agent: str,
memory: Optional[BaseChatMemory] = None,

):
# Find which value in the AgentType enum corresponds to the string
# passed in as agent
Expand Down
29 changes: 21 additions & 8 deletions src/backend/bisheng/interface/custom_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@
llm_type_to_cls_dict['vertexai-chat'] = ChatVertexAI # type: ignore

# llm contribute
llm_type_to_cls_dict.update(
{llm_name: import_class(f'bisheng_langchain.chat_models.{llm_name}') for llm_name in chat_models.__all__})
llm_type_to_cls_dict.update({
llm_name: import_class(f'bisheng_langchain.chat_models.{llm_name}')
for llm_name in chat_models.__all__
})

# Toolkits
toolkit_type_to_loader_dict: dict[str, Any] = {
Expand All @@ -45,11 +47,14 @@
}

# Wrappers
wrapper_type_to_cls_dict: dict[str, Any] = {wrapper.__name__: wrapper for wrapper in [requests.RequestsWrapper]}
wrapper_type_to_cls_dict: dict[str, Any] = {
wrapper.__name__: wrapper for wrapper in [requests.RequestsWrapper]
}

# Embeddings
embedding_type_to_cls_dict: dict[str, Any] = {
embedding_name: import_class(f'langchain.embeddings.{embedding_name}') for embedding_name in embeddings.__all__
embedding_name: import_class(f'langchain.embeddings.{embedding_name}')
for embedding_name in embeddings.__all__
}

embedding_type_to_cls_dict.update({
Expand All @@ -64,11 +69,19 @@
}

# contribute
documentloaders_type_to_cls_dict.update(
{loader: import_class(f'bisheng_langchain.document_loaders.{loader}') for loader in contribute_loader.__all__})
documentloaders_type_to_cls_dict.update({
loader: import_class(f'bisheng_langchain.document_loaders.{loader}')
for loader in contribute_loader.__all__
})

# Text Splitters
textsplitter_type_to_cls_dict: dict[str, Any] = dict(inspect.getmembers(text_splitter, inspect.isclass))
textsplitter_type_to_cls_dict: dict[str,
Any] = dict(inspect.getmembers(text_splitter, inspect.isclass))

# merge CUSTOM_AGENTS and CUSTOM_CHAINS
CUSTOM_NODES = {**CUSTOM_AGENTS, **CUSTOM_CHAINS, **CUSTOM_EMBEDDING, **CUSTOM_INPUTOUTPUT} # type: ignore
CUSTOM_NODES = {
**CUSTOM_AGENTS,
**CUSTOM_CHAINS,
**CUSTOM_EMBEDDING,
**CUSTOM_INPUTOUTPUT
} # type: ignore
Loading

0 comments on commit 906d9b8

Please sign in to comment.