Skip to content

Commit

Permalink
Feat/0.2.2.3 (#294)
Browse files Browse the repository at this point in the history
稳定版本
  • Loading branch information
yaojin3616 authored Jan 24, 2024
2 parents 5998e14 + 030a0fa commit d45934b
Show file tree
Hide file tree
Showing 35 changed files with 248 additions and 132 deletions.
11 changes: 11 additions & 0 deletions src/backend/bisheng/api/services/chat_imp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from bisheng.api.v1.schemas import ChatMessage
from bisheng.database.base import session_getter


def comment_answer(message_id: int, comment: str):
with session_getter() as session:
message = session.get(ChatMessage, message_id)
if message:
message.remark = comment[:4096]
session.add(message)
session.commit()
10 changes: 3 additions & 7 deletions src/backend/bisheng/api/v1/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
from typing import List, Optional
from uuid import UUID

from bisheng.api.services.chat_imp import comment_answer
from bisheng.api.utils import build_flow, build_input_keys_response
from bisheng.api.v1.schemas import (BuildStatus, BuiltResponse, ChatInput, ChatList, InitResponse,
StreamData, UnifiedResponseModel, resp_200)
from bisheng.cache.redis import redis_client
from bisheng.chat.manager import ChatManager
from bisheng.database.base import get_session, session_getter
from bisheng.database.base import get_session
from bisheng.database.models.flow import Flow
from bisheng.database.models.message import ChatMessage, ChatMessageRead
from bisheng.graph.graph.base import Graph
Expand Down Expand Up @@ -85,12 +86,7 @@ def like_response(*,
@router.post('/chat/comment', status_code=200)
def comment_resp(*, data: ChatInput, Authorize: AuthJWT = Depends()):
Authorize.jwt_required()
with session_getter() as session:
message = session.get(ChatMessage, data.message_id)
if message:
message.remark = data.comment
session.add(message)
session.commit()
comment_answer(data.message_id, data.comment)
return resp_200(message='操作成功')


Expand Down
6 changes: 5 additions & 1 deletion src/backend/bisheng/api/v1/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,11 @@ def create_flow(*,
"""Create a new flow."""
Authorize.jwt_required()
payload = json.loads(Authorize.get_jwt_subject())

# 判断用户是否重复技能名
if session.exec(
select(Flow).where(Flow.name == flow.name,
Flow.user_id == payload.get('user_id'))).first():
raise HTTPException(status_code=500, detail='技能名重复')
flow.user_id = payload.get('user_id')
db_flow = Flow.model_validate(flow)
session.add(db_flow)
Expand Down
31 changes: 23 additions & 8 deletions src/backend/bisheng/api/v1/knowledge.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ def create_knowledge(*,
Authorize.jwt_required()
payload = json.loads(Authorize.get_jwt_subject())
""" 创建知识库. """
knowledge.is_partition = knowledge.is_partition or settings.vectorstores.get('Milvus', {}).get(
'is_partition', True)
knowledge.is_partition = knowledge.is_partition or settings.get_knowledge().get(
'vectorstores', {}).get('Milvus', {}).get('is_partition', True)
db_knowldge = Knowledge.model_validate(knowledge)
know = session.exec(
select(Knowledge).where(Knowledge.name == knowledge.name,
Expand Down Expand Up @@ -302,7 +302,7 @@ def delete_knowledge(*,
if not access_check(payload, knowledge.user_id, knowledge_id, AccessType.KNOWLEDGE_WRITE):
raise HTTPException(status_code=404, detail='没有权限执行操作')
# 处理vector
embeddings = decide_embeddings(knowledge.model)
embeddings = FakeEmbedding()
vectore_client = decide_vectorstores(knowledge.collection_name, 'Milvus', embeddings)
if vectore_client.col:
logger.info(f'drop_vectore col={knowledge.collection_name}')
Expand All @@ -312,6 +312,10 @@ def delete_knowledge(*,
pk = vectore_client.col.query(expr=f'knowledge_id=="{knowledge.id}"',
output_fields=['pk'])
vectore_client.col.delete(f"pk in {[p['pk'] for p in pk]}")
# 判断milvus 是否还有entity
if vectore_client.col.num_entities == 0:
vectore_client.col.drop()

# 处理 es
# elastic
esvectore_client: 'ElasticKeywordsSearch' = decide_vectorstores(knowledge.index_name,
Expand Down Expand Up @@ -352,7 +356,8 @@ def delete_knowledge_file(*, file_id: int, Authorize: AuthJWT = Depends()):
# minio
minio_client = MinioClient()
minio_client.delete_minio(str(knowledge_file.id))
minio_client.delete_minio(str(knowledge_file.object_name))
if knowledge_file.object_name:
minio_client.delete_minio(str(knowledge_file.object_name))
# elastic
index_name = knowledge.index_name or collection_name
esvectore_client = decide_vectorstores(index_name, 'ElasticKeywordsSearch', embeddings)
Expand Down Expand Up @@ -391,6 +396,8 @@ def decide_vectorstores(collection_name: str, vector_store: str,
else:
param = {'collection_name': collection_name, 'embedding': embedding}
vector_config.pop('partition_suffix', '')
vector_config.pop('is_partition', '')

param.update(vector_config)
class_obj = import_vectorstore(vector_store)
return instantiate_vectorstore(class_object=class_obj, params=param)
Expand Down Expand Up @@ -480,7 +487,7 @@ def addEmbedding(collection_name, index_name, knowledge_id: int, model: str, chu
knowledge_file.file_name, knowledge_file.id,
time.time() - ts1)
except Exception as e:
logger.error(e)
logger.error('insert_metadata={} ', metadatas, e)
session = next(get_session())
db_file = session.get(KnowledgeFile, knowledge_file.id)
setattr(db_file, 'status', 3)
Expand Down Expand Up @@ -518,7 +525,7 @@ def _read_chunk_text(input_file, file_name, size, chunk_overlap, separator):
raw_texts = [t.page_content for t in texts]
metadatas = [{
'bbox': json.dumps({'chunk_bboxes': t.metadata.get('chunk_bboxes', '')}),
'page': t.metadata.get('page'),
'page': t.metadata.get('page') or 0,
'source': file_name,
'extra': ''
} for t in texts]
Expand Down Expand Up @@ -585,6 +592,7 @@ def file_knowledge(
status=1,
object_name=metadata_extra.get('url'))
session.add(db_file)
result = db_file.model_dump()
session.flush()

try:
Expand All @@ -602,6 +610,7 @@ def file_knowledge(
if es_client:
es_client.add_texts(texts=raw_texts, metadatas=metadata)
db_file.status = 2
result['status'] = 2
session.commit()

except Exception as e:
Expand All @@ -610,6 +619,9 @@ def file_knowledge(
setattr(db_file, 'remark', str(e)[:500])
session.add(db_file)
session.commit()
result['status'] = 3
result['remark'] = str(e)[:500]
return result


def text_knowledge(
Expand Down Expand Up @@ -647,7 +659,7 @@ def text_knowledge(
object_name=documents[0].metadata.get('url'))
session.add(db_file)
session.flush()

result = db_file.model_dump()
try:
metadata = [{
'file_id': db_file.id,
Expand All @@ -663,11 +675,14 @@ def text_knowledge(
if es_client:
es_client.add_texts(texts=[t.page_content for t in texts], metadatas=metadata)
db_file.status = 2
result['status'] = 2
session.commit()

except Exception as e:
logger.error(e)
setattr(db_file, 'status', 3)
setattr(db_file, 'remark', str(e)[:500])
session.add(db_file)
session.commit()
result['status'] = 3
result['remark'] = str(e)[:500]
return result
4 changes: 2 additions & 2 deletions src/backend/bisheng/api/v1/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ async def get_info(session: Session = Depends(get_session), Authorize: AuthJWT =
async def logout(Authorize: AuthJWT = Depends()):
Authorize.jwt_required()
Authorize.unset_jwt_cookies()
return {'msg': 'Successfully logout'}
return resp_200()


@router.get('/user/list', status_code=201)
Expand Down Expand Up @@ -256,7 +256,7 @@ async def get_role(*, session: Session = Depends(get_session), Authorize: AuthJW
raise HTTPException(status_code=500, detail='无查看权限')
# 默认不返回 管理员和普通用户,因为用户无法设置
db_role = session.exec(select(Role).where(Role.id > 1)).all()
return {'data': [jsonable_encoder(role) for role in db_role]}
return resp_200([jsonable_encoder(role) for role in db_role])


@router.delete('/role/{role_id}', status_code=200)
Expand Down
24 changes: 17 additions & 7 deletions src/backend/bisheng/api/v2/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from typing import Optional
from uuid import uuid4

from bisheng.api.services.chat_imp import comment_answer
from bisheng.api.v1.schemas import ChatInput, resp_200
from bisheng.cache.redis import redis_client
from bisheng.chat.manager import ChatManager
from bisheng.database.base import get_session, session_getter
Expand Down Expand Up @@ -44,13 +46,15 @@ async def union_websocket(flow_id: str,
tweak = json.loads(tweak)
graph_data = process_tweaks(graph_data, tweak)
# vectordatabase update
for node in graph_data['nodes']:
if 'VectorStore' in node['data']['node']['base_classes']:
if 'collection_name' in node['data'].get('node').get('template').keys():
node['data']['node']['template']['collection_name'][
'collection_id'] = knowledge_id
if 'index_name' in node['data'].get('node').get('template').keys():
node['data']['node']['template']['index_name']['collection_id'] = knowledge_id
if knowledge_id:
for node in graph_data['nodes']:
if 'VectorStore' in node['data']['node']['base_classes']:
if 'collection_name' in node['data'].get('node').get('template').keys():
node['data']['node']['template']['collection_name'][
'collection_id'] = knowledge_id
if 'index_name' in node['data'].get('node').get('template').keys():
node['data']['node']['template']['index_name'][
'collection_id'] = knowledge_id
trace_id = str(uuid4().hex)
with logger.contextualize(trace_id=trace_id):
await chat_manager.handle_websocket(
Expand Down Expand Up @@ -98,3 +102,9 @@ def solve_response(
message.solved = solved
session.commit()
return {'status_code': 200, 'status_message': 'success'}


@router.post('/comment', status_code=200)
def comment(*, data: ChatInput):
comment_answer(data.message_id, data.comment)
return resp_200()
14 changes: 8 additions & 6 deletions src/backend/bisheng/api/v2/filelib.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def get_filelist(
}


@router.post('/chunks', status_code=200)
@router.post('/chunks', response_model=UnifiedResponseModel[KnowledgeFileRead], status_code=200)
async def post_chunks(
*,
knowledge_id: int = Form(...),
Expand All @@ -290,12 +290,14 @@ async def post_chunks(
if not db_knowledge:
raise HTTPException(status_code=500, detail='当前知识库不可用,返回上级目录')

file_knowledge(db_knowledge, file_path, file_name, metadata, session)
db_file = file_knowledge(db_knowledge, file_path, file_name, metadata, session)

return resp_200()
return resp_200(db_file)


@router.post('/chunks_string', status_code=200)
@router.post('/chunks_string',
response_model=UnifiedResponseModel[KnowledgeFileRead],
status_code=200)
async def post_string_chunks(
*,
document: ChunkInput,
Expand All @@ -306,6 +308,6 @@ async def post_string_chunks(
if not db_knowledge:
raise HTTPException(status_code=500, detail='当前知识库不可用,返回上级目录')

text_knowledge(db_knowledge, document.documents, session)
db_file = text_knowledge(db_knowledge, document.documents, session)

return resp_200()
return resp_200(db_file)
6 changes: 3 additions & 3 deletions src/backend/bisheng/initdb_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ knowledges:
# openai_api_version: "" # azure api_version
embedding-host: # 知识库下拉框中显示的embedding模型的名称,可自定义
host_base_url: "" # 在模型管理页面中已上线的embedding服务的地址
model: "" # 在模型管理页面中已上线的embedding模型的名称
model: "" # 在模型管理页面中已上线的embedding模型的名称
vectorstores:
# Milvus 最低要求cpu 4C 8G 推荐4C 16G
Milvus: # 如果需要切换其他vectordb,确保其他服务已经启动,然后配置对应参数
connection_args: {'host': 'milvus', 'port': '19530', 'user': '', 'password': '', 'secure': False}
# partiton-key model, 用于分区的字段,如果不配置默认True, 分区后,新的partiton不会新建collection,可以通过增加suffix强制增加collection
is_partition: True
is_partition: False
partition_suffix: 1
# 可选配置,有些类型的场景使用ES可以提高召回效果
ElasticKeywordsSearch:
Expand Down Expand Up @@ -53,4 +53,4 @@ use_captcha:

# 聊天对话框配置
dialog_tips:
"欢迎使用bisheng,我们github地址:https://github.com/dataelement/bisheng, 有问题可以提issue,我们会尽快回复您。"
"内容由AI生成,仅供参考!"
2 changes: 1 addition & 1 deletion src/backend/bisheng/interface/initialize/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def instantiate_input_output(node_type, class_object, params, id_dict):
params['chains'] = chain_list
# variable
variable = params.get('variables')
variable_node_id = id_dict.get('variables')
variable_node_id = id_dict.get('variables') or []
params['variables'] = []
for index, id in enumerate(variable_node_id):
params['variables'].append({'node_id': id, 'input': variable[index]})
Expand Down
4 changes: 1 addition & 3 deletions src/backend/bisheng/template/frontend_node/chains.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,7 @@ def format_field(field: TemplateField, name: Optional[str] = None) -> None:
if name == 'APIChain' and field.name == 'limit_to_domains':
field.show = True
field.required = True
field.field_type = 'str'
field.is_list = True
field.value = ['']
field.value = None

field.advanced = False
if 'key' in field.name:
Expand Down
3 changes: 1 addition & 2 deletions src/backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ include = ["./bisheng/*", "bisheng/**/*"]
bisheng = "bisheng.__main__:main"

[tool.poetry.dependencies]
bisheng_langchain = "0.2.2.2"
bisheng_langchain = "0.2.2.3"
bisheng_pyautogen = "0.1.19"
minio = "7.2.0"
loguru = "^0.7.1"
Expand Down Expand Up @@ -50,7 +50,6 @@ rich = "^13.4.2"
networkx = "^3.1"
unstructured = "^0.7.0"
pypdf = "^3.11.0"
pypdfium2 = "^4.18.0"
lxml = "^4.9.2"
pysrt = "^1.1.2"
fake-useragent = "^1.1.3"
Expand Down
6 changes: 3 additions & 3 deletions src/backend/test/test_filelib.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def test_upload():


def test_file():
url = 'http://192.168.106.116:7860/api/v2/filelib/chunks'
url = 'http://127.0.0.1:7860/api/v2/filelib/chunks'
data = {'knowledge_id': 349, 'metadata': "{\"url\":\"https://baidu.com\"}"}
file = {'file': open('/Users/huangly/Downloads/co2.pdf', 'rb')}

Expand All @@ -33,5 +33,5 @@ def test_upload2():
resp


# test_file()
test_upload()
test_file()
# test_upload()
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ async def _acompletion_with_retry(**kwargs: Any) -> Any:
try:
async with self.client.apost(url=self.host_base_url, json=kwargs) as response:
if response.status != 200:
raise ValueError(f'Error: {response.status}')
raise ValueError(f'Error: {response.status} contet: {response.text}')
async for txt in response.content.iter_any():
if b'\n' in txt:
for txt_ in txt.split(b'\n'):
Expand Down
Loading

0 comments on commit d45934b

Please sign in to comment.