Skip to content

Commit

Permalink
Feat/0.2.2.4 (#305)
Browse files Browse the repository at this point in the history
1.  auto gen 支持 azure部署
2. 支持商汤 sensetime 模型
3. 注册密码特殊字符校验改为字符校验
  • Loading branch information
yaojin3616 committed Jan 29, 2024
2 parents 3e048f7 + cd48726 commit db265dc
Show file tree
Hide file tree
Showing 34 changed files with 840 additions and 251 deletions.
7 changes: 4 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ jobs:
${{ env.DOCKERHUB_REPO }}bisheng-frontend:${{ steps.get_version.outputs.VERSION }}
- name: notify feishu
uses: sozo-design/curl@v1.0.2
uses: fjogeleit/http-request-action@v1
with:
args: -X POST -d '{"msg_type":"text","content":{"text":"latest version 编译成功"}}' https://open.feishu.cn/open-apis/bot/v2/hook/2cfe0d8d-647c-4408-9f39-c59134035c4b

url: ${{ secrets.FEISHU_WEBHOOK }}
method: 'POST'
data: '{"msg_type":"text","content":{"text":" ${{ steps.get_version.outputs.VERSION }}发布成功"}}'
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
exclude: ^scripts|docs|docker|requirements|README.md
exclude: ^scripts|docs|docker|requirements|README.md|test
repos:
- repo: https://github.com/PyCQA/flake8.git
rev: 3.8.3
Expand Down
52 changes: 26 additions & 26 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ services:

office:
container_name: bisheng-office
image: onlyoffice/documentserver:7.1.1
image: onlyoffice/documentserver:7.2.1
ports:
- "8701:80"
environment:
Expand Down Expand Up @@ -144,28 +144,28 @@ services:
timeout: 20s
retries: 3

milvus:
container_name: milvus-standalone
image: milvusdb/milvus:v2.3.3
command: ["milvus", "run", "standalone"]
security_opt:
- seccomp:unconfined
environment:
ETCD_ENDPOINTS: etcd:2379
MINIO_ADDRESS: minio:9000
volumes:
- /etc/localtime:/etc/localtime:ro
- ${DOCKER_VOLUME_DIRECTORY:-.}/data/milvus:/var/lib/milvus
restart: on-failure
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
start_period: 90s
interval: 30s
timeout: 20s
retries: 3
ports:
- "19530:19530"
- "9091:9091"
depends_on:
- etcd
- minio
# milvus:
# container_name: milvus-standalone
# image: milvusdb/milvus:v2.3.3
# command: ["milvus", "run", "standalone"]
# security_opt:
# - seccomp:unconfined
# environment:
# ETCD_ENDPOINTS: etcd:2379
# MINIO_ADDRESS: minio:9000
# volumes:
# - /etc/localtime:/etc/localtime:ro
# - ${DOCKER_VOLUME_DIRECTORY:-.}/data/milvus:/var/lib/milvus
# restart: on-failure
# healthcheck:
# test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
# start_period: 90s
# interval: 30s
# timeout: 20s
# retries: 3
# ports:
# - "19530:19530"
# - "9091:9091"
# depends_on:
# - etcd
# - minio
2 changes: 1 addition & 1 deletion src/backend/bisheng/api/services/chat_imp.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from bisheng.api.v1.schemas import ChatMessage
from bisheng.database.base import session_getter
from bisheng.database.models.message import ChatMessage


def comment_answer(message_id: int, comment: str):
Expand Down
38 changes: 38 additions & 0 deletions src/backend/bisheng/api/services/knowledge_imp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import re
import time
from uuid import uuid4

from bisheng.database.base import session_getter
from bisheng.database.models.knowledge import Knowledge, KnowledgeCreate
from bisheng.settings import settings
from fastapi import HTTPException
from sqlmodel import select


def create_knowledge(knowledge: KnowledgeCreate, user_id: int):
""" 创建知识库. """
knowledge.is_partition = knowledge.is_partition or settings.get_knowledge().get(
'vectorstores', {}).get('Milvus', {}).get('is_partition', True)
db_knowldge = Knowledge.model_validate(knowledge)
with session_getter() as session:
know = session.exec(
select(Knowledge).where(Knowledge.name == knowledge.name,
knowledge.user_id == user_id)).all()
if know:
raise HTTPException(status_code=500, detail='知识库名称重复')
if not db_knowldge.collection_name:
if knowledge.is_partition:
embedding = re.sub(r'[^\w]', '_', knowledge.model)
suffix_id = settings.get_knowledge().get('vectorstores').get('Milvus', {}).get(
'partition_suffix', 1)
db_knowldge.collection_name = f'partition_{embedding}_knowledge_{suffix_id}'
else:
# 默认collectionName
db_knowldge.collection_name = f'col_{int(time.time())}_{str(uuid4())[:8]}'
db_knowldge.index_name = f'col_{int(time.time())}_{str(uuid4())[:8]}'
db_knowldge.user_id = user_id
with session_getter() as session:
session.add(db_knowldge)
session.commit()
session.refresh(db_knowldge)
return db_knowldge.copy()
20 changes: 13 additions & 7 deletions src/backend/bisheng/api/v1/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from bisheng.cache.redis import redis_client
from bisheng.cache.utils import save_uploaded_file
from bisheng.chat.utils import judge_source, process_source_document
from bisheng.database.base import get_session
from bisheng.database.base import get_session, session_getter
from bisheng.database.models.config import Config
from bisheng.database.models.flow import Flow
from bisheng.database.models.message import ChatMessage
Expand Down Expand Up @@ -110,7 +110,6 @@ def save_config(data: dict, session: Session = Depends(get_session)):
@router.post('/predict/{flow_id}', response_model=UnifiedResponseModel[ProcessResponse])
@router.post('/process/{flow_id}', response_model=UnifiedResponseModel[ProcessResponse])
async def process_flow(
session: Annotated[Session, Depends(get_session)],
flow_id: str,
inputs: Optional[dict] = None,
tweaks: Optional[dict] = None,
Expand All @@ -124,9 +123,10 @@ async def process_flow(
"""
if inputs and isinstance(inputs, dict) and 'id' in inputs:
inputs.pop('id')

logger.info(f'act=api_call sessionid={session_id} flow_id={flow_id}')
try:
flow = session.get(Flow, flow_id)
with session_getter() as session:
flow = session.get(Flow, flow_id)
if flow is None:
raise ValueError(f'Flow {flow_id} not found')
if flow.data is None:
Expand Down Expand Up @@ -201,10 +201,16 @@ async def process_flow(
session.commit()
session.refresh(message)
extra.update({'source': source, 'message_id': message.id})
task_result.update(extra)
task_result.update({'result': result})
if source != 0:

if source == 1:
await process_source_document(source_documents, session_id, message.id, answer)
elif source == 4:
# QA
extra_qa = json.loads(answer.metadata.get('extra'))
extra_qa.pop('answer', None)
extra.update({'doc': [extra_qa]})
task_result.update(extra)
task_result.update({'answer': result})
except Exception as e:
logger.error(e)

Expand Down
36 changes: 19 additions & 17 deletions src/backend/bisheng/api/v1/knowledge.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from langchain.schema import Document
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.base import VectorStore
from pymilvus import Collection
from sqlalchemy import func, or_
from sqlmodel import Session, select

Expand Down Expand Up @@ -167,19 +168,18 @@ async def process_knowledge(*,


@router.post('/create', response_model=UnifiedResponseModel[KnowledgeRead], status_code=201)
def create_knowledge(*,
session: Session = Depends(get_session),
knowledge: KnowledgeCreate,
Authorize: AuthJWT = Depends()):
def create_knowledge(*, knowledge: KnowledgeCreate, Authorize: AuthJWT = Depends()):
""" 创建知识库. """
Authorize.jwt_required()
payload = json.loads(Authorize.get_jwt_subject())
""" 创建知识库. """
user_id = payload.get('user_id')
knowledge.is_partition = knowledge.is_partition or settings.get_knowledge().get(
'vectorstores', {}).get('Milvus', {}).get('is_partition', True)
db_knowldge = Knowledge.model_validate(knowledge)
know = session.exec(
select(Knowledge).where(Knowledge.name == knowledge.name,
knowledge.user_id == payload.get('user_id'))).all()
with session_getter() as session:
know = session.exec(
select(Knowledge).where(Knowledge.name == knowledge.name,
knowledge.user_id == user_id)).all()
if know:
raise HTTPException(status_code=500, detail='知识库名称重复')
if not db_knowldge.collection_name:
Expand All @@ -192,11 +192,12 @@ def create_knowledge(*,
# 默认collectionName
db_knowldge.collection_name = f'col_{int(time.time())}_{str(uuid4())[:8]}'
db_knowldge.index_name = f'col_{int(time.time())}_{str(uuid4())[:8]}'
db_knowldge.user_id = payload.get('user_id')
session.add(db_knowldge)
session.commit()
session.refresh(db_knowldge)
return resp_200(db_knowldge)
db_knowldge.user_id = user_id
with session_getter() as session:
session.add(db_knowldge)
session.commit()
session.refresh(db_knowldge)
return resp_200(db_knowldge.copy())


@router.get('/', status_code=200)
Expand Down Expand Up @@ -304,7 +305,7 @@ def delete_knowledge(*,
# 处理vector
embeddings = FakeEmbedding()
vectore_client = decide_vectorstores(knowledge.collection_name, 'Milvus', embeddings)
if vectore_client.col:
if isinstance(vectore_client.col, Collection):
logger.info(f'drop_vectore col={knowledge.collection_name}')
if knowledge.collection_name.startswith('col'):
vectore_client.col.drop()
Expand Down Expand Up @@ -486,6 +487,7 @@ def addEmbedding(collection_name, index_name, knowledge_id: int, model: str, chu
logger.info('process_file_done file_name={} file_id={} time_cost={}',
knowledge_file.file_name, knowledge_file.id,
time.time() - ts1)

except Exception as e:
logger.error('insert_metadata={} ', metadatas, e)
session = next(get_session())
Expand Down Expand Up @@ -592,8 +594,8 @@ def file_knowledge(
status=1,
object_name=metadata_extra.get('url'))
session.add(db_file)
result = db_file.model_dump()
session.flush()
result = db_file.model_dump()

try:
metadata = [{
Expand Down Expand Up @@ -633,8 +635,8 @@ def text_knowledge(
try:
embeddings = decide_embeddings(db_knowledge.model)
vectore_client = decide_vectorstores(db_knowledge.collection_name, 'Milvus', embeddings)
es_client = decide_vectorstores(db_knowledge.index_name, 'ElasticKeywordsSearch',
embeddings)
index_name = db_knowledge.index_name or db_knowledge.collection_name
es_client = decide_vectorstores(index_name, 'ElasticKeywordsSearch', embeddings)
except Exception as e:
logger.exception(e)

Expand Down
25 changes: 4 additions & 21 deletions src/backend/bisheng/api/v2/filelib.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import time
from typing import Optional
from uuid import uuid4

from bisheng.api.services import knowledge_imp
from bisheng.api.v1.knowledge import (addEmbedding, decide_embeddings, decide_vectorstores,
file_knowledge, text_knowledge)
from bisheng.api.v1.schemas import ChunkInput, UnifiedResponseModel, resp_200
Expand All @@ -26,26 +25,10 @@


@router.post('/', response_model=KnowledgeRead, status_code=201)
def create_knowledge(
*,
session: Session = Depends(get_session),
knowledge: KnowledgeCreate,
):
def creat(knowledge: KnowledgeCreate):
"""创建知识库."""
db_knowldge = Knowledge.from_orm(knowledge)
know = session.exec(
select(Knowledge).where(
Knowledge.name == knowledge.name,
knowledge.user_id == settings.get_from_db('default_operator').get('user'))).all()
if know:
raise HTTPException(status_code=500, detail='知识库名称重复')
if not db_knowldge.collection_name:
# 默认collectionName
db_knowldge.collection_name = f'col_{int(time.time())}_{str(uuid4())[:8]}'
db_knowldge.user_id = settings.get_from_db('default_operator').get('user')
session.add(db_knowldge)
session.commit()
session.refresh(db_knowldge)
user_id = knowledge.user_id or settings.get_from_db('default_operator').get('user')
db_knowldge = knowledge_imp.create_knowledge(knowledge, user_id)
return db_knowldge


Expand Down
2 changes: 1 addition & 1 deletion src/backend/bisheng/database/models/knowledge_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class KnowledgeFileBase(SQLModelSerializable):
md5: Optional[str] = Field(index=False)
status: Optional[int] = Field(index=False)
object_name: Optional[str] = Field(index=False)
remark: Optional[str] = Field(sa_column=String(length=512))
remark: Optional[str] = Field(sa_column=Column(String(length=512)))
create_time: Optional[datetime] = Field(
sa_column=Column(DateTime, nullable=False, server_default=text('CURRENT_TIMESTAMP')))
update_time: Optional[datetime] = Field(
Expand Down
2 changes: 2 additions & 0 deletions src/backend/bisheng/default_node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,8 @@ llms:
documentation: ""
CustomLLMChat:
documentation: ""
SenseChat:
documentation: ""
###
# There's a bug in this component deactivating until we get it sorted: _language_models.py", line 804, in send_message
# is_blocked=safety_attributes.get("blocked", False),
Expand Down
2 changes: 1 addition & 1 deletion src/backend/bisheng/initdb_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ knowledges:
# openai_api_version: "" # azure api_version
embedding-host: # 知识库下拉框中显示的embedding模型的名称,可自定义
host_base_url: "" # 在模型管理页面中已上线的embedding服务的地址
model: "" # 在模型管理页面中已上线的embedding模型的名称
model: "" # 在模型管理页面中已上线的embedding模型的名称
vectorstores:
# Milvus 最低要求cpu 4C 8G 推荐4C 16G
Milvus: # 如果需要切换其他vectordb,确保其他服务已经启动,然后配置对应参数
Expand Down
2 changes: 1 addition & 1 deletion src/backend/bisheng/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def set_redis_url(cls, values):
if match:
password = match.group(0)
new_password = decrypt_token(password)
new_redis_url = re.sub(pattern, f':{new_password}@', values['redis_url'])
new_redis_url = re.sub(pattern, f'{new_password}', values['redis_url'])
values['redis_url'] = new_redis_url
return values

Expand Down
Loading

0 comments on commit db265dc

Please sign in to comment.