Skip to content

Commit

Permalink
update ci (#37)
Browse files Browse the repository at this point in the history
  • Loading branch information
yaojin3616 committed Sep 18, 2023
2 parents fd24747 + 2f89ef3 commit 0291a82
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 15 deletions.
12 changes: 7 additions & 5 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ on:

env:
DOCKERHUB_REPO: dataelement/
PY_NEXUS: http://110.16.193.170:50083
DOCKER_NEXUS: http://110.16.193.170:50080

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
Expand Down Expand Up @@ -41,13 +43,13 @@ jobs:
pip install twine
cd ./src/bisheng-langchain
python setup.py bdist_wheel
repo="http://110.16.193.170:50083/repository/pypi-hosted/"
repo="${{ env.PY_NEXUS }}/repository/pypi-hosted/"
twine upload --verbose -u ${{ secrets.NEXUS_USER }} -p ${{ secrets.NEXUS_PASSWORD }} --repository-url $repo dist/*.whl
# 发布到 私有仓库
- name: set insecure registry
run: |
echo "{ \"insecure-registries\": [\"http://110.16.193.170:50083\"] }" | sudo tee /etc/docker/daemon.json
echo "{ \"insecure-registries\": [\"${{ env.DOCKER_NEXUS }}\"] }" | sudo tee /etc/docker/daemon.json
sudo service docker restart
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
Expand All @@ -62,9 +64,9 @@ jobs:
- name: Login Nexus Container Registry
uses: docker/login-action@v2
with:
registry: http://110.16.193.170:50083
username: ${{ secrets.NEXUS_PUBLIC }}
password: ${{ secrets.NEXUS_PUBLIC_PASSWORD }}
registry: ${{ env.DOCKER_NEXUS }}
username: ${{ secrets.NEXUS_USER }}
password: ${{ secrets.NEXUS_PASSWORD }}
# 构建 backend 并推送到 Docker hub
- name: Build backend and push
id: docker_build_backend
Expand Down
47 changes: 37 additions & 10 deletions src/backend/bisheng/api/v1/knowledge.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ async def get_embedding():


@router.post('/process', status_code=201)
async def process_knowledge(*, session: Session = Depends(get_session), data: dict, Authorize: AuthJWT = Depends()):
async def process_knowledge(*,
session: Session = Depends(get_session),
data: dict,
Authorize: AuthJWT = Depends()):
"""上传文件到知识库.
使用flowchain来处理embeding的流程
"""
Expand Down Expand Up @@ -108,7 +111,8 @@ def create_knowledge(*,
"""创建知识库."""
db_knowldge = Knowledge.from_orm(knowledge)
know = session.exec(
select(Knowledge).where(Knowledge.name == knowledge.name, knowledge.user_id == payload.get('user_id'))).all()
select(Knowledge).where(Knowledge.name == knowledge.name,
knowledge.user_id == payload.get('user_id'))).all()
if know:
raise HTTPException(status_code=500, detail='知识库名称重复')
if not db_knowldge.collection_name:
Expand Down Expand Up @@ -158,18 +162,30 @@ def get_knowledge(*,


@router.get('/file_list/{knowledge_id}', status_code=200)
def get_filelist(*, session: Session = Depends(get_session), knowledge_id: int, page_size: int = 10, page_num: int = 1):
def get_filelist(*,
session: Session = Depends(get_session),
knowledge_id: int,
page_size: int = 10,
page_num: int = 1):
""" 获取知识库文件信息. """
# 查找上传的文件信息
total_count = session.scalar(select(func.count(KnowledgeFile.id)).where(KnowledgeFile.knowledge_id == knowledge_id))
total_count = session.scalar(
select(func.count(KnowledgeFile.id)).where(KnowledgeFile.knowledge_id == knowledge_id))
files = session.exec(
select(KnowledgeFile).where(KnowledgeFile.knowledge_id == knowledge_id).order_by(
KnowledgeFile.update_time.desc()).offset(page_size * (page_num - 1)).limit(page_size)).all()
return {'data': [jsonable_encoder(knowledgefile) for knowledgefile in files], 'total': total_count}
KnowledgeFile.update_time.desc()).offset(page_size *
(page_num - 1)).limit(page_size)).all()
return {
'data': [jsonable_encoder(knowledgefile) for knowledgefile in files],
'total': total_count
}


@router.delete('/{knowledge_id}', status_code=200)
def delete_knowledge(*, session: Session = Depends(get_session), knowledge_id: int, Authorize: AuthJWT = Depends()):
def delete_knowledge(*,
session: Session = Depends(get_session),
knowledge_id: int,
Authorize: AuthJWT = Depends()):
Authorize.jwt_required()
payload = json.loads(Authorize.get_jwt_subject())
""" 删除知识库信息. """
Expand All @@ -184,7 +200,10 @@ def delete_knowledge(*, session: Session = Depends(get_session), knowledge_id: i


@router.delete('/file/{file_id}', status_code=200)
def delete_knowledge_file(*, session: Session = Depends(get_session), file_id: int, Authorize: AuthJWT = Depends()):
def delete_knowledge_file(*,
session: Session = Depends(get_session),
file_id: int,
Authorize: AuthJWT = Depends()):
Authorize.jwt_required()
payload = json.loads(Authorize.get_jwt_subject())
""" 删除知识文件信息 """
Expand Down Expand Up @@ -231,6 +250,8 @@ async def addEmbedding(collection_name, model: str, chunk_size: int, file_paths:

embeddings = decide_embeddings(model)
vectore_client = decide_vectorstores(collection_name, embeddings)
# es_param = {'index_name': }
# es_client = import_vectorstore("ElasticKeywordsSearch")
for index, path in enumerate(file_paths):
knowledge_file = knowledge_files[index]
try:
Expand All @@ -253,9 +274,15 @@ async def addEmbedding(collection_name, model: str, chunk_size: int, file_paths:


def _read_chunk_text(input_file, file_name, size):
from langchain.document_loaders import (PyPDFLoader, BSHTMLLoader, TextLoader, UnstructuredMarkdownLoader)
from langchain.document_loaders import (PyPDFLoader, BSHTMLLoader, TextLoader,
UnstructuredMarkdownLoader)
from langchain.text_splitter import CharacterTextSplitter
filetype_load_map = {'txt': TextLoader, 'pdf': PyPDFLoader, 'html': BSHTMLLoader, 'md': UnstructuredMarkdownLoader}
filetype_load_map = {
'txt': TextLoader,
'pdf': PyPDFLoader,
'html': BSHTMLLoader,
'md': UnstructuredMarkdownLoader
}

file_type = file_name.split('.')[-1]
if file_type not in filetype_load_map:
Expand Down

0 comments on commit 0291a82

Please sign in to comment.