-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
配置修改,代码更新
- Loading branch information
Showing
22 changed files
with
443 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
from bisheng_langchain.chains.combine_documents.stuff import StuffDocumentsChain | ||
|
||
__all__ = [ | ||
'StuffDocumentsChain', | ||
] | ||
from .loader_output import LoaderOutputChain | ||
|
||
__all__ = ['StuffDocumentsChain', 'LoaderOutputChain'] | ||
|
68 changes: 68 additions & 0 deletions
68
src/bisheng-langchain/bisheng_langchain/chains/loader_output.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
"""Chain that runs an arbitrary python function.""" | ||
import functools | ||
import logging | ||
import json | ||
from typing import Any, Awaitable, Callable, Dict, List, Optional | ||
|
||
from langchain.callbacks.manager import ( | ||
AsyncCallbackManagerForChainRun, | ||
CallbackManagerForChainRun, | ||
) | ||
from langchain.chains.base import Chain | ||
from langchain.docstore.document import Document | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class LoaderOutputChain(Chain): | ||
"""Chain that print the loader output. | ||
""" | ||
documents: List[Document] | ||
input_key: str = "begin" #: :meta private: | ||
output_key: str = "text" #: :meta private: | ||
|
||
@staticmethod | ||
@functools.lru_cache | ||
def _log_once(msg: str) -> None: | ||
"""Log a message once. | ||
:meta private: | ||
""" | ||
logger.warning(msg) | ||
|
||
@property | ||
def input_keys(self) -> List[str]: | ||
"""Expect input keys. | ||
:meta private: | ||
""" | ||
return [self.input_key] | ||
|
||
@property | ||
def output_keys(self) -> List[str]: | ||
"""Return output keys. | ||
:meta private: | ||
""" | ||
return [self.output_key] | ||
|
||
def _call( | ||
self, | ||
inputs: Dict[str, str], | ||
run_manager: Optional[CallbackManagerForChainRun] = None, | ||
) -> Dict[str, str]: | ||
contents = [doc.page_content for doc in self.documents] | ||
contents = '\n\n'.join(contents) | ||
# contents = json.dumps(contents, indent=2, ensure_ascii=False) | ||
output = {self.output_key: contents} | ||
return output | ||
|
||
async def _acall( | ||
self, | ||
inputs: Dict[str, Any], | ||
run_manager: Optional[AsyncCallbackManagerForChainRun] = None, | ||
) -> Dict[str, Any]: | ||
contents = [doc.page_content for doc in self.documents] | ||
contents = json.dumps(contents, indent=2, ensure_ascii=False) | ||
output = {self.output_key: contents} | ||
return output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
4 changes: 3 additions & 1 deletion
4
src/bisheng-langchain/bisheng_langchain/document_loaders/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,6 @@ | ||
from .elem_pdf import PDFWithSemanticLoader | ||
|
||
from .elem_unstrcutured_loader import ElemUnstructuredLoader, ElemUnstructuredLoaderV0 | ||
|
||
__all__ = ['PDFWithSemanticLoader', 'ElemUnstructuredLoader', 'ElemUnstructuredLoaderV0'] | ||
__all__ = ['PDFWithSemanticLoader', 'ElemUnstructuredLoader', 'ElemUnstructuredLoaderV0', 'UniversalKVLoader'] | ||
|
4 changes: 4 additions & 0 deletions
4
src/bisheng-langchain/bisheng_langchain/document_loaders/parsers/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,9 @@ | ||
from .image import LayoutParser | ||
from .ocr_client import OCRClient | ||
from .ellm_client import ELLMClient | ||
|
||
__all__ = [ | ||
'LayoutParser', | ||
'OCRClient', | ||
'ELLMClient' | ||
] |
56 changes: 56 additions & 0 deletions
56
src/bisheng-langchain/bisheng_langchain/document_loaders/parsers/ellm_client.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# import base64 | ||
import copy | ||
import base64 | ||
import requests | ||
from typing import Any, Iterator, List, Mapping, Optional, Union | ||
|
||
|
||
class ELLMClient(object): | ||
def __init__(self, | ||
api_base_url: Optional[str] = None): | ||
self.ep = api_base_url | ||
self.client = requests.Session() | ||
self.timeout = 10000 | ||
self.params = { | ||
'sort_filter_boxes': True, | ||
'enable_huarong_box_adjust': True, | ||
'support_long_image_segment': True, | ||
'checkbox': ['std_checkbox'], | ||
'rotateupright': True | ||
} | ||
|
||
self.scene_mapping = { | ||
'doc': { | ||
'det': 'general_text_det_mrcnn_v1.0', | ||
'recog': 'transformer-v2.8-gamma-faster', | ||
'ellm': 'ELLM' | ||
}, | ||
'form': { | ||
'det': 'mrcnn-v5.1', | ||
'recog': 'transformer-v2.8-gamma-faster', | ||
'ellm': 'ELLM' | ||
}, | ||
'hand': { | ||
'det': 'mrcnn-v5.1', | ||
'recog': 'transformer-hand-v1.16-faster', | ||
'ellm': 'ELLM' | ||
} | ||
} | ||
|
||
def predict(self, inp): | ||
scene = inp.pop('scene', 'form') | ||
b64_image = inp.pop('b64_image') | ||
ellm_schema = inp.pop('keys') | ||
params = copy.deepcopy(self.params) | ||
params.update(self.scene_mapping[scene]) | ||
params.update({'ellm_schema': ellm_schema}) | ||
|
||
req_data = {'data': [b64_image], 'param': params} | ||
|
||
try: | ||
r = self.client.post(url=self.ep, | ||
json=req_data, | ||
timeout=self.timeout) | ||
return r.json() | ||
except Exception as e: | ||
return {'status_code': 400, 'status_message': str(e)} |
Oops, something went wrong.