diff --git a/examples/aflow/benchmark/gsm8k.py b/examples/aflow/benchmark/gsm8k.py index 9f7813f86..1e38fc23d 100644 --- a/examples/aflow/benchmark/gsm8k.py +++ b/examples/aflow/benchmark/gsm8k.py @@ -40,7 +40,7 @@ def calculate_score(self, expected_output: float, prediction: float) -> Tuple[fl async def evaluate_problem(self, problem: dict, graph: Callable) -> Tuple[str, str, float, float, float]: max_retries = 5 retries = 0 - + while retries < max_retries: try: prediction, cost = await graph(problem["question"]) diff --git a/examples/aflow/data/download_data.py b/examples/aflow/data/download_data.py index a2214177f..00aaa7ebc 100644 --- a/examples/aflow/data/download_data.py +++ b/examples/aflow/data/download_data.py @@ -1,9 +1,16 @@ +# -*- coding: utf-8 -*- +# @Date : 2024-10-20 +# @Author : MoshiQAQ & didi +# @Desc : Download and extract dataset files + import os import requests import tarfile from tqdm import tqdm +from typing import List, Dict -def download_file(url, filename): +def download_file(url: str, filename: str) -> None: + """Download a file from the given URL and show progress.""" response = requests.get(url, stream=True) total_size = int(response.headers.get('content-length', 0)) block_size = 1024 @@ -15,22 +22,49 @@ def download_file(url, filename): progress_bar.update(size) progress_bar.close() -def extract_tar_gz(filename, extract_path): +def extract_tar_gz(filename: str, extract_path: str) -> None: + """Extract a tar.gz file to the specified path.""" with tarfile.open(filename, 'r:gz') as tar: tar.extractall(path=extract_path) -url = "https://drive.google.com/uc?export=download&id=1tXp5cLw89egeKRwDuood2TPqoEWd8_C0" -filename = "aflow_data.tar.gz" -extract_path = "./" - -print("Downloading data file...") -download_file(url, filename) - -print("Extracting data file...") -extract_tar_gz(filename, extract_path) +def process_dataset(url: str, filename: str, extract_path: str) -> None: + """Download, extract, and clean up a dataset.""" + print(f"Downloading {filename}...") + download_file(url, filename) + + print(f"Extracting {filename}...") + extract_tar_gz(filename, extract_path) + + print(f"{filename} download and extraction completed.") + + os.remove(filename) + print(f"Removed {filename}") -print("Download and extraction completed.") +# Define the datasets to be downloaded +# Users can modify this list to choose which datasets to download +datasets_to_download: List[Dict[str, str]] = [ + { + "name": "datasets", + "url": "https://drive.google.com/uc?export=download&id=1tXp5cLw89egeKRwDuood2TPqoEWd8_C0", + "filename": "aflow_data.tar.gz", + "extract_path": "examples/aflow/data" + }, + { + "name": "results", + "url": "", # Please fill in the correct URL + "filename": "result.tar.gz", + "extract_path": "examples/aflow/data/results" + }, + { + "name": "initial_rounds", + "url": "", # Please fill in the correct URL + "filename": "first_round.tar.gz", + "extract_path": "examples/aflow/scripts/optimized" + } +] -# Clean up the compressed file -os.remove(filename) -print(f"Removed {filename}") +def download(datasets): + """Main function to process all selected datasets.""" + for dataset_name in datasets: + dataset = datasets_to_download[dataset_name] + process_dataset(dataset['url'], dataset['filename'], dataset['extract_path']) \ No newline at end of file diff --git a/examples/aflow/data/human-eval_test.jsonl b/examples/aflow/data/humaneval_test.jsonl similarity index 100% rename from examples/aflow/data/human-eval_test.jsonl rename to examples/aflow/data/humaneval_test.jsonl diff --git a/examples/aflow/data/human-eval_validate.jsonl b/examples/aflow/data/humaneval_validate.jsonl similarity index 100% rename from examples/aflow/data/human-eval_validate.jsonl rename to examples/aflow/data/humaneval_validate.jsonl diff --git a/examples/aflow/scripts/evaluator.py b/examples/aflow/scripts/evaluator.py index 1e746c172..14b068168 100644 --- a/examples/aflow/scripts/evaluator.py +++ b/examples/aflow/scripts/evaluator.py @@ -39,25 +39,21 @@ async def graph_evaluate(self, dataset: DatasetType, graph, params: dict, path: data_path = self._get_data_path(dataset, is_test) benchmark_class = self.dataset_configs[dataset] - benchmark = benchmark_class(dataset, data_path, path) + benchmark = benchmark_class(name=dataset, file_path=data_path, log_path=path) # Use params to configure the graph and benchmark - configured_graph = await self._configure_graph(graph, params) + configured_graph = await self._configure_graph(dataset, graph, params) va_list = [1,2,3] # Use va_list from params, or use default value if not provided return await benchmark.run_evaluation(configured_graph, va_list) - async def _configure_graph(self, graph, params: dict): + async def _configure_graph(self, dataset, graph, params: dict): # Here you can configure the graph based on params # For example: set LLM configuration, dataset configuration, etc. dataset_config = params.get("dataset", {}) llm_config = params.get("llm_config", {}) - return graph(name=self.dataset_configs[dataset]["name"], llm_config=llm_config, dataset=dataset_config) + return graph(name=dataset, llm_config=llm_config, dataset=dataset_config) def _get_data_path(self, dataset: DatasetType, test: bool) -> str: base_path = f"examples/aflow/data/{dataset.lower()}" return f"{base_path}_test.jsonl" if test else f"{base_path}_validate.jsonl" - -# Alias methods for backward compatibility -for dataset in ["gsm8k", "math", "humaneval", "mbpp", "hotpotqa", "drop"]: - setattr(Evaluator, f"_{dataset}_eval", lambda self, *args, dataset=dataset.upper(), **kwargs: self.graph_evaluate(dataset, *args, **kwargs)) diff --git a/examples/aflow/scripts/optimizer.py b/examples/aflow/scripts/optimizer.py index d96d1e089..d73be574f 100644 --- a/examples/aflow/scripts/optimizer.py +++ b/examples/aflow/scripts/optimizer.py @@ -82,23 +82,27 @@ def optimize(self, mode: OptimizerType = "Graph"): retry_count = 0 max_retries = 1 - while retry_count < max_retries: - try: - score = loop.run_until_complete(self._optimize_graph()) - break - except Exception as e: - retry_count += 1 - logger.info(f"Error occurred: {e}. Retrying... (Attempt {retry_count}/{max_retries})") - if retry_count == max_retries: - logger.info("Max retries reached. Moving to next round.") - score = None - - wait_time = 5 * retry_count - time.sleep(wait_time) - - if retry_count < max_retries: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) + + score = loop.run_until_complete(self._optimize_graph()) + + # while retry_count < max_retries: + # try: + # score = loop.run_until_complete(self._optimize_graph()) + # break + # except Exception as e: + # retry_count += 1 + # logger.info(f"Error occurred: {e}. Retrying... (Attempt {retry_count}/{max_retries})") + # if retry_count == max_retries: + # logger.info("Max retries reached. Moving to next round.") + # score = None + + # wait_time = 5 * retry_count + # time.sleep(wait_time) + + # if retry_count < max_retries: + # loop = asyncio.new_event_loop() + # asyncio.set_event_loop(loop) + self.round += 1 logger.info(f"Score for round {self.round}: {score}") @@ -114,7 +118,7 @@ def optimize(self, mode: OptimizerType = "Graph"): time.sleep(5) async def _optimize_graph(self): - validation_n = 5 # validation datasets's execution number + validation_n = 2 # validation datasets's execution number graph_path = f"{self.root_path}/workflows" data = self.data_utils.load_results(graph_path) diff --git a/metagpt/actions/action_node.py b/metagpt/actions/action_node.py index 06186f725..8755b8991 100644 --- a/metagpt/actions/action_node.py +++ b/metagpt/actions/action_node.py @@ -622,21 +622,19 @@ async def fill( if self.schema: schema = self.schema - if mode == FillMode.CODE_FILL: + + if mode == FillMode.CODE_FILL.value: result = await self.code_fill(context, function_name, timeout) self.instruct_content = self.create_class()(**result) return self - elif mode == FillMode.CONTEXT_FILL: - """ - 使用xml_compile,但是这个版本没有办法实现system message 跟 temperature - """ + elif mode == FillMode.CONTEXT_FILL.value: context = self.xml_compile(context=self.context) result = await self.context_fill(context) self.instruct_content = self.create_class()(**result) return self - elif mode == FillMode.SINGLE_FILL: + elif mode == FillMode.SINGLE_FILL.value: result = await self.single_fill(context) self.instruct_content = self.create_class()(**result) return self diff --git a/optimize.py b/optimize.py index 29bd4a8b3..9b6c81f51 100644 --- a/optimize.py +++ b/optimize.py @@ -4,6 +4,7 @@ # @Desc : Entrance of AFlow. from examples.aflow.scripts.optimizer import Optimizer +from examples.aflow.data.download_data import download from metagpt.configs.models_config import ModelsConfig from typing import Literal @@ -12,10 +13,13 @@ QuestionType = Literal["math", "code", "qa"] OptimizerType = Literal["Graph", "Test"] +# When you fisrt use, please download the datasets and initial rounds; If you want to get a look of the results, please download the results. +# download(["datasets", "results", "initial_rounds"]) + # Crucial Parameters -dataset: DatasetType = "HotpotQA" # Ensure the type is consistent with DatasetType +dataset: DatasetType = "GSM8K" # Ensure the type is consistent with DatasetType sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows -question_type: QuestionType = "quiz" # Ensure the type is consistent with QuestionType +question_type: QuestionType = "code" # Ensure the type is consistent with QuestionType optimized_path: str = "examples/aflow/scripts/optimized" # Optimized Result Save Path initial_round: int = 1 # Corrected the case from Initial_round to initial_round max_rounds: int = 20 @@ -30,9 +34,9 @@ "Custom", # It's basic unit of a fixed node. optimizer can modify its prompt to get vairous nodes. # "AnswerGenerate" # It's for qa # "CustomCodeGenerate", # It's for code - # "ScEnsemble", # It's for code, math and qa + "ScEnsemble", # It's for code, math and qa # "Test", # It's for code - # "Programmer", # It's for math + "Programmer", # It's for math ] # Create an optimizer instance @@ -53,4 +57,4 @@ # Optimize workflow via setting the optimizer's mode to 'Graph' optimizer.optimize("Graph") # Test workflow via setting the optimizer's mode to 'Test' - optimizer.optimize("Test") \ No newline at end of file + # optimizer.optimize("Test") \ No newline at end of file