diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f8b87da36..6f8d35699 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -38,7 +38,7 @@ exclude: | (?x)^( docs/.*| tests/.*| - demos/.*| + demos/(?!api_service/).*| tools/mm_eval/inception_metrics/.*| thirdparty/easy_animate/.*| .*\.md diff --git a/demos/api_service/configs/model_configs.json b/demos/api_service/configs/model_configs.json index ae15b2419..0755be0ed 100644 --- a/demos/api_service/configs/model_configs.json +++ b/demos/api_service/configs/model_configs.json @@ -18,4 +18,4 @@ "temperature": 0.0 } } -] \ No newline at end of file +] diff --git a/demos/api_service/utils.py b/demos/api_service/utils.py index 71f63052e..fc039871e 100644 --- a/demos/api_service/utils.py +++ b/demos/api_service/utils.py @@ -1,25 +1,27 @@ import datetime -import os -import yaml -import requests -from json import loads as jloads, dumps as jdumps -import importlib import glob +import importlib +import os +from json import dumps as jdumps +from json import loads as jloads from typing import Dict, Optional from urllib.parse import urljoin -from PIL import Image -from loguru import logger - -from agentscope.service import ServiceResponse, ServiceToolkit +import requests +import yaml from agentscope.message import Msg +from agentscope.service import ServiceToolkit +from loguru import logger +from PIL import Image DJ_BASE_URL = 'http://localhost:8000' DJ_CONFIG_TEMPLATE = './configs/dj_config_template.yaml' DJ_OUTPUT = 'outputs' -def call_data_juicer_api(path: str, params: Optional[Dict] = None, json: Optional[Dict] = None): +def call_data_juicer_api(path: str, + params: Optional[Dict] = None, + json: Optional[Dict] = None): url = urljoin(DJ_BASE_URL, path) if json is not None: @@ -44,11 +46,12 @@ def init_config(dataset_path: str, op_name: str, **op_args): dj_config = yaml.safe_load(fin) dj_config['dataset_path'] = dataset_path timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') - dj_config['export_path'] = os.path.join(DJ_OUTPUT, timestamp, 'processed_data.jsonl') + dj_config['export_path'] = os.path.join(DJ_OUTPUT, timestamp, + 'processed_data.jsonl') dj_config['process'].append({op_name: op_args}) url_path = '/data_juicer/config/get_init_configs' try: - res = call_data_juicer_api(url_path, params={"cfg": jdumps(dj_config)}) + res = call_data_juicer_api(url_path, params={'cfg': jdumps(dj_config)}) except Exception as e: error_msg = f'An unexpected error occurred in calling {url_path}:\n{e}' raise RuntimeError(error_msg) @@ -62,17 +65,21 @@ def execute_analyzer(dj_config: dict): Args: dj_config: configs of data-juicer """ - logger.chat(Msg(name="system", content="Analyzing data...", role="system")) + logger.chat(Msg(name='system', content='Analyzing data...', role='system')) try: - res = call_data_juicer_api('/data_juicer/core/Analyzer/run', {"skip_return": True}, json={"cfg": jdumps(dj_config)}) + res = call_data_juicer_api('/data_juicer/core/Analyzer/run', + {'skip_return': True}, + json={'cfg': jdumps(dj_config)}) assert res['status'] == 'success' return dj_config['export_path'] except Exception as e: error_msg = f'An unexpected error occurred in Data-Juicer: {e}' raise RuntimeError(error_msg) - -def show_analyzed_results(analyzed_result_path: str, require_min=True, require_max=True): + +def show_analyzed_results(analyzed_result_path: str, + require_min=True, + require_max=True): """ Show the analyzed results to the users and get the specified thresholds. @@ -82,34 +89,44 @@ def show_analyzed_results(analyzed_result_path: str, require_min=True, require_m """ if os.path.isfile(analyzed_result_path): - analyzed_result_path = os.path.join(os.path.dirname(analyzed_result_path), 'analysis') + analyzed_result_path = os.path.join( + os.path.dirname(analyzed_result_path), 'analysis') - hist_file = max(glob.glob(os.path.join(analyzed_result_path, '*hist.png')), key=os.path.getctime, default=None) + hist_file = max(glob.glob(os.path.join(analyzed_result_path, '*hist.png')), + key=os.path.getctime, + default=None) if hist_file is not None: img = Image.open(hist_file) img.show() min_threshold, max_threshold = 0, 0 if require_min: - min_threshold = float(input("Based on above analyzed results, enter the minimum threshold value for filter: ")) + min_threshold = float( + input('Based on above analyzed results, ' + 'enter the minimum threshold value for filter: ')) if require_max: - max_threshold = float(input("Based on above analyzed results, enter the maximum threshold value for filter: ")) + max_threshold = float( + input('Based on above analyzed results, ' + 'enter the maximum threshold value for filter: ')) return min_threshold, max_threshold else: - error_msg = f'Error in showing analyzed results: {analyzed_result_path}' + error_msg = f'Error in showing analyzed result: {analyzed_result_path}' raise RuntimeError(error_msg) -def execute_config(dj_config: dict): +def execute_config(dj_config: Dict): """ Execute data-juicer data process. Args: dj_config: configs of data-juicer """ - logger.chat(Msg(name="system", content="Processing data...", role="system")) + logger.chat(Msg(name='system', content='Processing data...', + role='system')) try: - res = call_data_juicer_api('/data_juicer/core/Executor/run', params={"skip_return": True}, json={"cfg": jdumps(dj_config)}) + res = call_data_juicer_api('/data_juicer/core/Executor/run', + params={'skip_return': True}, + json={'cfg': jdumps(dj_config)}) assert res['status'] == 'success' return dj_config['export_path'] except Exception as e: