diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py index b71def136..6da974d96 100644 --- a/metagpt/utils/repair_llm_raw_output.py +++ b/metagpt/utils/repair_llm_raw_output.py @@ -119,15 +119,22 @@ def repair_json_format(output: str) -> str: logger.info(f"repair_json_format: {'}]'}") elif output.startswith("{") and output.endswith("]"): output = output[:-1] + "}" - - # remove `#` in output json str, usually appeared in `glm-4` + # remove comments in output json string, after json value content, maybe start with #, maybe start with // arr = output.split("\n") new_arr = [] - for line in arr: - idx = line.find("#") - if idx >= 0: - line = line[:idx] - new_arr.append(line) + for json_line in arr: + # look for # or // comments and make sure they are not inside the string value + comment_index = -1 + for match in re.finditer(r"(\".*?\"|\'.*?\')|(#|//)", json_line): + if match.group(1): # if the string value + continue + if match.group(2): # if comments + comment_index = match.start(2) + break + # if comments, then delete them + if comment_index != -1: + json_line = json_line[:comment_index].rstrip() + new_arr.append(json_line) output = "\n".join(new_arr) return output diff --git a/tests/metagpt/utils/test_repair_llm_raw_output.py b/tests/metagpt/utils/test_repair_llm_raw_output.py index 1f809a081..9d53b8243 100644 --- a/tests/metagpt/utils/test_repair_llm_raw_output.py +++ b/tests/metagpt/utils/test_repair_llm_raw_output.py @@ -141,6 +141,32 @@ def test_repair_json_format(): output = repair_llm_raw_output(output=raw_output, req_keys=[None], repair_type=RepairType.JSON) assert output == target_output + raw_output = """ +{ + "Language": "en_us", // define language + "Programming Language": "Python" # define code language +} +""" + target_output = """{ + "Language": "en_us", + "Programming Language": "Python" +}""" + output = repair_llm_raw_output(output=raw_output, req_keys=[None], repair_type=RepairType.JSON) + assert output == target_output + + raw_output = """ + { + "Language": "#en_us#", // define language + "Programming Language": "//Python # Code // Language//" # define code language + } + """ + target_output = """{ + "Language": "#en_us#", + "Programming Language": "//Python # Code // Language//" + }""" + output = repair_llm_raw_output(output=raw_output, req_keys=[None], repair_type=RepairType.JSON) + assert output == target_output + def test_repair_invalid_json(): from metagpt.utils.repair_llm_raw_output import repair_invalid_json