Skip to content

Commit

Permalink
add feat 0.1.7
Browse files Browse the repository at this point in the history
预发布0.1.7
1. 支持知识库问答溯源文件
2. 增加图片进入知识库
3. mixESretriever 组件支持
4. InputFileNode 支持动态判断当前文件格式
5. bug fix
  • Loading branch information
yaojin3616 committed Oct 16, 2023
2 parents 7f57de9 + d6a7cc4 commit f7dba6d
Show file tree
Hide file tree
Showing 84 changed files with 2,397 additions and 19,250 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ jobs:
pip install wheel
pip install twine
cd ./src/bisheng-langchain
echo "v0.0.0" > version.txt
python setup.py bdist_wheel
repo="http://${{ env.PY_NEXUS }}/repository/pypi-hosted/"
twine upload --verbose -u ${{ secrets.NEXUS_USER }} -p ${{ secrets.NEXUS_PASSWORD }} --repository-url $repo dist/*.whl
Expand Down Expand Up @@ -79,6 +80,7 @@ jobs:
run: |
cd ./src/backend
poetry source add --priority=supplemental foo http://${{ secrets.NEXUS_PUBLIC }}:${{ secrets.NEXUS_PUBLIC_PASSWORD }}@${{ env.PY_NEXUS }}/repository/pypi-group/simple
sed -i '' 's/^bisheng_langchain.*/bisheng_langchain = "0.0.0"/g' pyproject.toml
poetry lock
cd ../../
Expand Down
32 changes: 24 additions & 8 deletions docker/bisheng/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,35 @@ admin:
# server: "192.168.0.1:9001"

# 为知识库的embedding进行模型撇脂
knowledges:
embeddings:
text-embedding-ada-002:
openai_api_base: "https://api.openai.com/v1/"
openai_proxy: ""
openai_api_key: ""
knowledges: # 知识库相关配置
unstructured_api_url: "" # 毕昇非结构化数据解析服务地址,提供包括OCR文字识别、表格识别、版式分析等能力。非必填,填写后能够提升PDF、图片、
embeddings: # 配置知识库的embedding服务,以下示例填写了两类embedding服务的配置方法,第一个是openai的embedding模型服务的配置方法,第二个是本地部署的embedding模型服务的配置方法,如果有多个可以添加多个
text-embedding-ada-002: # 知识库下拉框中显示的embedding模型的名称,可自定义
openai_api_base: "https://api.openai.com/v1"
openai_proxy: "" # 如果是自己代理的服务地址,则填在这里
openai_api_key: "" # 私有的,openai账号的key
embedding-host: # 知识库下拉框中显示的embedding模型的名称,可自定义
host_base_url: "" # 在模型管理页面中已上线的embedding服务的地址
model: "" # 在模型管理页面中已上线的embedding模型的名称
vectorstores:
# Milvus 最低要求cpu 4C 8G 推荐4C 16G
Milvus: # 如果需要切换其他vectordb,确保其他服务已经启动,然后配置对应参数
connection_args: {'host': '110.16.193.170', 'port': '50032', 'user': '', 'password': '', 'secure': False}
# 可选配置,有些类型的场景使用ES可以提高召回效果
ElasticKeywordsSearch:
elasticsearch_url: 'https://192.168.106.14:9200'
ssl_verify: {'ca_certs': False, 'basic_auth': "('elastic', 'F94h5JtdQn6EQB-G9Hjv')", 'verify_certs': False}
elasticsearch_url: ""
ssl_verify: "{'ca_certs': False, 'basic_auth': ('elastic', 'password'), 'verify_certs': False}"
minio: # 如果要支持溯源功能,由于溯源会展示源文件,必须配置 oss 存储
MINIO_ENDPOINT: ""
MINIO_SHAREPOIN: ""
MINIO_ACCESS_KEY: ""
MINIO_SECRET_KEY: ""

# 全局配置大模型
default_llm: # 可选配置。BISHENG系统中有些功能需要使用大模型的能力,当前问答溯源功能中会用到,未来还会有其他功能会使用到。在问答溯源功能中,使用大语言模型自动从答案中提取关键词,来帮助用户快速定位到答案的可能来源段落,如果这里没有配置,则会使用jieba分词来输出答案中的关键词。
model: "" # 在模型管理页面中已上线的大模型服务的名称
host_base_url: "" # 在模型管理页面中已上线的大模型服务的地址


agents:
ZeroShotAgent:
Expand Down
11 changes: 5 additions & 6 deletions src/backend/bisheng/api/router.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
# Router for base api
from bisheng.api.v1 import (chat_router, endpoints_router, flow_styles_router,
flows_router, knowledge_router, server_router,
skillcenter_router, user_router, validate_router)
from bisheng.api.v1 import (chat_router, endpoints_router, flow_styles_router, flows_router,
knowledge_router, qa_router, server_router, skillcenter_router,
user_router, validate_router)
from fastapi import APIRouter

router = APIRouter(
prefix='/api/v1',
)
router = APIRouter(prefix='/api/v1',)
router.include_router(chat_router)
router.include_router(endpoints_router)
router.include_router(validate_router)
Expand All @@ -16,3 +14,4 @@
router.include_router(knowledge_router)
router.include_router(server_router)
router.include_router(user_router)
router.include_router(qa_router)
45 changes: 24 additions & 21 deletions src/backend/bisheng/api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,16 +75,16 @@ def build_flow(graph_data: dict, artifacts, process_file=False, flow_id=None, ch
}
yield str(StreamData(event='log', data=log_dict))
# # 如果存在文件,当前不操作文件,避免重复操作
# if not process_file and chat_id is not None:
# template_dict = {
# key: value
# for key, value in vertex.data['node']['template'].items()
# if isinstance(value, dict)
# }
# for key, value in template_dict.items():
# if value.get('type') == 'file':
# # 过滤掉文件
# vertex.params[key] = ''
if not process_file and chat_id is not None:
template_dict = {
key: value
for key, value in vertex.data['node']['template'].items()
if isinstance(value, dict)
}
for key, value in template_dict.items():
if value.get('type') == 'file':
# 过滤掉文件
vertex.params[key] = ''

# vectore store 引入自动建库逻辑
# 聊天窗口等flow 主动生成的vector 需要新建临时collection
Expand Down Expand Up @@ -139,17 +139,17 @@ def build_flow_no_yield(graph_data: dict,

for i, vertex in enumerate(graph.generator_build(), 1):
try:
# # 如果存在文件,当前不操作文件,避免重复操作
# if not process_file:
# template_dict = {
# key: value
# for key, value in vertex.data['node']['template'].items()
# if isinstance(value, dict)
# }
# for key, value in template_dict.items():
# if value.get('type') == 'file':
# # 过滤掉文件
# vertex.params[key] = ''
# 如果存在文件,当前不操作文件,避免重复操作
if not process_file and vertex.base_type == 'documentloaders':
template_dict = {
key: value
for key, value in vertex.data['node']['template'].items()
if isinstance(value, dict)
}
for key, value in template_dict.items():
if value.get('type') == 'fileNode':
# 过滤掉文件
vertex.params[key] = ''

# vectore store 引入自动建库逻辑
# 聊天窗口等flow 主动生成的vector 需要新建临时collection
Expand All @@ -158,6 +158,9 @@ def build_flow_no_yield(graph_data: dict,
if 'collection_name' in vertex.params and not vertex.params.get('collection_name'):
vertex.params['collection_name'] = f'tmp_{flow_id}_{chat_id}'
logger.info(f"rename_vector_col col={vertex.params['collection_name']}")
if process_file:
# L1 清除Milvus历史记录
vertex.params['drop_old'] = True

vertex.build()
params = vertex._built_object_repr()
Expand Down
2 changes: 2 additions & 0 deletions src/backend/bisheng/api/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from bisheng.api.v1.flow_styles import router as flow_styles_router
from bisheng.api.v1.flows import router as flows_router
from bisheng.api.v1.knowledge import router as knowledge_router
from bisheng.api.v1.qa import router as qa_router
from bisheng.api.v1.server import router as server_router
from bisheng.api.v1.skillcenter import router as skillcenter_router
from bisheng.api.v1.user import router as user_router
Expand All @@ -18,4 +19,5 @@
'knowledge_router',
'server_router',
'user_router',
'qa_router',
]
8 changes: 6 additions & 2 deletions src/backend/bisheng/api/v1/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ def get_chatlist_list(*, session: Session = Depends(get_session), Authorize: Aut
func.max(ChatMessage.create_time).label('create_time'),
func.max(ChatMessage.update_time).label('update_time')).where(
ChatMessage.user_id == payload.get('user_id')).group_by(
ChatMessage.flow_id, ChatMessage.chat_id).order_by(func.max(ChatMessage.create_time).desc()))
ChatMessage.flow_id,
ChatMessage.chat_id).order_by(func.max(ChatMessage.create_time).desc()))
db_message = session.exec(smt).all()
flow_ids = [message.flow_id for message in db_message]
db_flow = session.exec(select(Flow).where(Flow.id.in_(flow_ids))).all()
Expand Down Expand Up @@ -100,10 +101,13 @@ async def chat(client_id: str,
graph_data = db_flow.data
else:
flow_data_key = 'flow_data_' + client_id
if str(flow_data_store.hget(flow_data_key, 'status'), 'utf-8') != BuildStatus.SUCCESS.value:
if not flow_data_store.exists(flow_data_key) or str(
flow_data_store.hget(flow_data_key, 'status'),
'utf-8') != BuildStatus.SUCCESS.value:
await websocket.accept()
message = '当前编译没通过'
await websocket.close(code=status.WS_1013_TRY_AGAIN_LATER, reason=message)
return
graph_data = json.loads(flow_data_store.hget(flow_data_key, 'graph_data'))

try:
Expand Down
18 changes: 13 additions & 5 deletions src/backend/bisheng/api/v1/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
from typing import List
from uuid import UUID

from sqlalchemy import func

from bisheng.api.utils import build_flow_no_yield, remove_api_keys
from bisheng.api.v1.schemas import FlowListCreate, FlowListRead
from bisheng.database.base import get_session
Expand All @@ -13,14 +11,18 @@
from fastapi import APIRouter, Depends, File, HTTPException, Query, UploadFile
from fastapi.encoders import jsonable_encoder
from fastapi_jwt_auth import AuthJWT
from sqlalchemy import func
from sqlmodel import Session, select

# build router
router = APIRouter(prefix='/flows', tags=['Flows'])


@router.post('/', response_model=FlowRead, status_code=201)
def create_flow(*, session: Session = Depends(get_session), flow: FlowCreate, Authorize: AuthJWT = Depends()):
def create_flow(*,
session: Session = Depends(get_session),
flow: FlowCreate,
Authorize: AuthJWT = Depends()):
"""Create a new flow."""
Authorize.jwt_required()
payload = json.loads(Authorize.get_jwt_subject())
Expand Down Expand Up @@ -124,7 +126,10 @@ def update_flow(*,


@router.delete('/{flow_id}', status_code=200)
def delete_flow(*, session: Session = Depends(get_session), flow_id: UUID, Authorize: AuthJWT = Depends()):
def delete_flow(*,
session: Session = Depends(get_session),
flow_id: UUID,
Authorize: AuthJWT = Depends()):
Authorize.jwt_required()
payload = json.loads(Authorize.get_jwt_subject())
"""Delete a flow."""
Expand All @@ -141,7 +146,10 @@ def delete_flow(*, session: Session = Depends(get_session), flow_id: UUID, Autho

# Define a new model to handle multiple flows
@router.post('/batch/', response_model=List[FlowRead], status_code=201)
def create_flows(*, session: Session = Depends(get_session), flow_list: FlowListCreate, Authorize: AuthJWT = Depends()):
def create_flows(*,
session: Session = Depends(get_session),
flow_list: FlowListCreate,
Authorize: AuthJWT = Depends()):
Authorize.jwt_required()
payload = json.loads(Authorize.get_jwt_subject())
"""Create multiple new flows."""
Expand Down
Loading

0 comments on commit f7dba6d

Please sign in to comment.