Skip to content

Commit

Permalink
update bisheng_langchain && deploy (#14)
Browse files Browse the repository at this point in the history
  • Loading branch information
yaojin3616 committed Sep 11, 2023
2 parents 4fe64b5 + c6676e5 commit 7efe773
Show file tree
Hide file tree
Showing 76 changed files with 2,076 additions and 1,982 deletions.
15 changes: 15 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,18 @@ jobs:
tags: |
${{ env.DOCKERHUB_REPO }}bisheng-frontend:latest
${{ env.DOCKERHUB_REPO }}bisheng-frontend:${{ steps.get_version.outputs.VERSION }}
# 构建 bisheng_langchain
- name: Set python version 3.8
uses: actions/setup-python@v1
with:
python-version: 3.8

- name: Build PyPi bisheng-langchain and push
id: pypi_build_bisheng_langchain
run: |
pip install Cython
pip install wheel
pip install twine
cd ./src/bisheng_langchain
python setup.py bdist_wheel
twine upload dist/* -u ${{ secrets.PYPI_USER }} -p ${{ secrets.PYPI_PASSWORD }} --repository pypi
80 changes: 80 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@

name: release

on:
push:
# Sequence of patterns matched against refs/tags
branches:
- "release"

env:
DOCKERHUB_REPO: dataelement/

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
build:
runs-on: ubuntu-latest
#if: startsWith(github.event.ref, 'refs/tags')
steps:
- name: checkout
uses: actions/checkout@v2

- name: Get version
id: get_version
run: |
echo ::set-output name=VERSION::${GITHUB_REF/refs\/tags\//}
# 构建 backend 并推送到 Docker hub
- name: Build backend and push
id: docker_build_backend
uses: docker/build-push-action@v2
with:
# backend 的context目录
context: "./src/backend/"
# 是否 docker push
push: false
# docker build arg, 注入 APP_NAME/APP_VERSION
build-args: |
APP_NAME="bisheng-backend"
APP_VERSION=${{ steps.get_version.outputs.VERSION }}
# 生成两个 docker tag: ${APP_VERSION} 和 latest
tags: |
${{ env.DOCKERHUB_REPO }}bisheng-backend:latest
${{ env.DOCKERHUB_REPO }}bisheng-backend:${{ steps.get_version.outputs.VERSION }}
# 构建 Docker frontend 并推送到 Docker hub
- name: Build frontend and push
id: docker_build_frontend
uses: docker/build-push-action@v2
with:
# frontend 的context目录
context: "./src/frontend/"
# 是否 docker push
push: false
# docker build arg, 注入 APP_NAME/APP_VERSION
build-args: |
APP_NAME="bisheng-frontend"
APP_VERSION=${{ steps.get_version.outputs.VERSION }}
# 生成两个 docker tag: ${APP_VERSION} 和 latest
tags: |
${{ env.DOCKERHUB_REPO }}bisheng-frontend:latest
${{ env.DOCKERHUB_REPO }}bisheng-frontend:${{ steps.get_version.outputs.VERSION }}
# 构建 bisheng-langchain
- name: Set python version 3.8
uses: actions/setup-python@v1
with:
python-version: 3.8

- name: Build PyPi bisheng-langchain and push
id: pypi_build_bisheng_langchain
run: |
pip install Cython
pip install wheel
pip install twine
cd ./src/bisheng-langchain
python setup.py bdist_wheel
repo="http://110.16.193.170:50083/repository/pypi-hosted/"
twine upload --verbose -u ${{ secrets.NEXUS_USER }} -p ${{ secrets.NEXUS_PASSWORD }} --repository-url $repo dist/*.whl
14 changes: 8 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ repos:
rev: 3.8.3
hooks:
- id: flake8
args: ["--max-line-length=240"]
args: ["--max-line-length=120"]
- repo: https://github.com/asottile/seed-isort-config
rev: v2.2.0
hooks:
Expand All @@ -14,11 +14,13 @@ repos:
hooks:
- id: isort
files: \.(py|pyd)$
# - repo: https://github.com/pre-commit/mirrors-yapf
# rev: v0.32.0
# hooks:
# - id: yapf
# files: \.(py|pyd)$
args: ["-l 100"]
- repo: https://github.com/pre-commit/mirrors-yapf
rev: v0.32.0
hooks:
- id: yapf
files: \.(py|pyd)$
args: ["--style={column_limit: 120}"]
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.1.0
hooks:
Expand Down
22 changes: 13 additions & 9 deletions docker/bisheng/config/config.yaml
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
# 数据库配置
database_url:
"mysql+pymysql://username:password@192.168.106.106:3306/bisheng"
"mysql+pymysql://root:1234@mysql:3306/bisheng"
redis_url:
"192.168.106.116:6379"
"redis:6379"

# 为知识库的embedding进行模型撇脂
embedding_config:
text-embedding-ada-002:
base_url:
""
multilingual-e5-large:
base_url:
""
knowledges:
embeddings:
text-embedding-ada-002:
base_url: ""
openai_proxy: ""
openai_api_key: ""
vectorstores:
Chroma:
persist_directory: "/app/data/chroma_persist" # In-memory chroma with saving/loading to disk
# Milvus: # 如果需要切换其他vectordb,确保其他服务已经启动,然后配置对应参数
# connection_args = {'host': '127.0.0.1', 'port': '19530', 'user': '', 'password': '', 'secure': False}

agents:
ZeroShotAgent:
Expand Down
13 changes: 7 additions & 6 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ services:
mysql:
image: mysql:5.7.40
environment:
- "MYSQL_ROOT_PASSWORD=E1SkG0PaDMEPTAxY"
- "MYSQL_ROOT_PASSWORD=1234" # 数据库密码,建议修改,如果修改需要同步修改bisheng/congfig/config.yaml配置
- "MYSQL_DATABASE=bisheng"
- "TZ=Asia/Shanghai"
ports:
Expand All @@ -24,18 +24,19 @@ services:
- ${DOCKER_VOLUME_DIRECTORY:-.}/mysql/data:/var/lib/mysql

backend:
image: dataelement/bisheng-backend:0.0.1
image: dataelement/bisheng-backend:latest
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/bisheng/config/config.yaml:/app/bisheng/config.yaml
- ${DOCKER_VOLUME_DIRECTORY:-.}/bisheng/data/:/app/data/
ports:
- "7861:7860"
command: bash -c "uvicorn bisheng.main:app --host 0.0.0.0 --port 7860 --workers 2" # --workers 表示使用几个进程,提高并发度
restart: on-failure

nginx:
image: dataelement/bisheng-frontend:0.0.1
image: dataelement/bisheng-frontend:latest
ports:
- 3001:3001
- "3001:3001"
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/nginx/nginx.conf:/etc/nginx/nginx.conf
- ${DOCKER_VOLUME_DIRECTORY:-.}/nginx/conf.d:/etc/nginx/conf.d
- ${DOCKER_VOLUME_DIRECTORY:-.}/nginx/html:/usr/share/nginx/html
- ${DOCKER_VOLUME_DIRECTORY:-.}/nginx/conf.d:/etc/nginx/conf.d
1 change: 1 addition & 0 deletions docker/nginx/conf.d/default.conf
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ server {
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
client_max_body_size 50m;
}

}
4 changes: 1 addition & 3 deletions src/backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@ COPY poetry.lock pyproject.toml ./
COPY ./ ./

RUN python -m pip install --upgrade pip && \
pip install gunicorn && \
pip install shapely==2.0.1 && \
pip install langchain_contrib==0.0.1
pip install shapely==2.0.1

# Install dependencies
RUN poetry config virtualenvs.create false
Expand Down
62 changes: 43 additions & 19 deletions src/backend/bisheng/api/v1/knowledge.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,21 @@
from bisheng.api.v1.schemas import UploadFileResponse
from bisheng.cache.utils import save_uploaded_file
from bisheng.database.base import get_session
from bisheng.database.models.knowledge import (Knowledge, KnowledgeCreate,
KnowledgeRead)
from bisheng.database.models.knowledge_file import (KnowledgeFile,
KnowledgeFileRead)
from bisheng.database.models.knowledge import Knowledge, KnowledgeCreate, KnowledgeRead
from bisheng.database.models.knowledge_file import KnowledgeFile, KnowledgeFileRead
from bisheng.database.models.user import User
from bisheng.interface.embeddings.custom import OpenAIProxyEmbedding
from bisheng.interface.importing.utils import import_vectorstore
from bisheng.interface.initialize.loading import instantiate_vectorstore
from bisheng.settings import settings
from bisheng.utils.logger import logger
from bisheng_langchain.embeddings.host_embedding import HostEmbeddings
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile
from fastapi.encoders import jsonable_encoder
from fastapi_jwt_auth import AuthJWT
from langchain.embeddings.base import Embeddings
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Milvus
from langchain.vectorstores.base import VectorStore
from sqlmodel import Session, select

# build router
Expand Down Expand Up @@ -83,7 +85,10 @@ async def process_knowledge(*, session: Session = Depends(get_session), data: di
file_paths.append(filepath)
logger.info(f'fileName={file_name} col={collection_name}')
asyncio.create_task(
addEmbedding(collection_name=collection_name, chunk_size=chunck_size, file_paths=file_paths, knowledge_files=files))
addEmbedding(collection_name=collection_name,
chunk_size=chunck_size,
file_paths=file_paths,
knowledge_files=files))

knowledge.update_time = db_file.create_time
session.add(knowledge)
Expand All @@ -92,7 +97,10 @@ async def process_knowledge(*, session: Session = Depends(get_session), data: di


@router.post('/create', response_model=KnowledgeRead, status_code=201)
def create_knowledge(*, session: Session = Depends(get_session), knowledge: KnowledgeCreate, Authorize: AuthJWT = Depends()):
def create_knowledge(*,
session: Session = Depends(get_session),
knowledge: KnowledgeCreate,
Authorize: AuthJWT = Depends()):
Authorize.jwt_required()
payload = json.loads(Authorize.get_jwt_subject())
"""创建知识库."""
Expand Down Expand Up @@ -175,32 +183,48 @@ def delete_knowledge_file(*, session: Session = Depends(get_session), file_id: i
raise HTTPException(status_code=404, detail='没有权限执行操作')
knowledge = session.get(Knowledge, knowledge_file.knowledge_id)
# 处理vectordb

collection_name = knowledge.collection_name
embeddings = OpenAIEmbeddings()
milvus = Milvus(embedding_function=embeddings, collection_name=collection_name, connection_args=connection_args)
pk = milvus.col.query(expr=f'file_id == {file_id}', output_fields=['pk'])
res = milvus.col.delete(f"pk in {[p['pk'] for p in pk]}")
embeddings = decide_embeddings(knowledge.model)
vectore_client = decide_vectorstores(collection_name, embeddings)
if isinstance(vectore_client, Milvus):
pk = vectore_client.col.query(expr=f'file_id == {file_id}', output_fields=['pk'])
res = vectore_client.col.delete(f"pk in {[p['pk'] for p in pk]}")

logger.info(f'act=delete_vector file_id={file_id} res={res}')
session.delete(knowledge_file)
session.commit()
return {'message': 'knowledge file deleted successfully'}


connection_args = {'host': '192.168.106.116', 'port': '19530', 'user': '', 'password': '', 'secure': False}
def decide_embeddings(model: str) -> Embeddings:
model_list = settings.knowledges.get('embeddings')
if model == 'text-embedding-ada-002':
return OpenAIEmbeddings(**model_list.get('model'))
else:
return HostEmbeddings(**model_list.get('model'))


def decide_vectorstores(collection_name: str, embedding: Embeddings) -> VectorStore:
param = {'collection_name': collection_name, 'embedding_function': embedding}
vector_store = list(settings.knowledges.get('vectorstores').keys())[0]
vector_config = settings.knowledges.get('vectorstores').get(vector_store)
param.update(vector_config)
class_obj = import_vectorstore(vector_store)
return instantiate_vectorstore(class_object=class_obj, params=param)


async def addEmbedding(collection_name, model: str, chunk_size: int, file_paths: List[str],
knowledge_files: List[KnowledgeFile]):

async def addEmbedding(collection_name, chunk_size: int, file_paths: List[str], knowledge_files: List[KnowledgeFile]):
embeddings = OpenAIProxyEmbedding()
embeddings = decide_embeddings(model)
vectore_client = decide_vectorstores(collection_name, embeddings)
for index, path in enumerate(file_paths):
knowledge_file = knowledge_files[index]
try:
texts, metadatas = _read_chunk_text(path, knowledge_file.file_name, chunk_size)
[metadata.update({'file_id': knowledge_file.id}) for metadata in metadatas]
Milvus.from_texts(texts=texts,
embedding=embeddings,
metadatas=metadatas,
collection_name=collection_name,
connection_args=connection_args)
vectore_client.add_texts(texts=texts, metadatas=metadatas)

session = next(get_session())
db_file = session.get(KnowledgeFile, knowledge_file.id)
Expand Down
Loading

0 comments on commit 7efe773

Please sign in to comment.