Skip to content

Commit

Permalink
fix: auto-correct inner quotation marks in JSON strings
Browse files Browse the repository at this point in the history
  • Loading branch information
nsantacruz committed Jun 20, 2024
1 parent 625abeb commit 3214246
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 2 deletions.
5 changes: 3 additions & 2 deletions app/topic_prompt/topic_prompt_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from topic_prompt.toprompt_llm_prompt import TopromptLLMPrompt, get_output_parser
from topic_prompt.toprompt import Toprompt, TopromptOptions
from topic_prompt.differentiate_writing import repeated_phrase
from util.general import escape_json_inner_quotes

from langchain.prompts import PromptTemplate
from basic_langchain.chat_models import ChatOpenAI
Expand All @@ -32,7 +33,7 @@ def _get_toprompt_options(lang: str, topic: Topic, source: TopicPromptSource, ot
responses += [HumanMessage(content=secondary_prompt.format())]

output_parser = get_output_parser()
parsed_output = output_parser.parse(curr_response.content)
parsed_output = output_parser.parse(escape_json_inner_quotes(curr_response.content))
parsed_output.title = _remove_colon_from_title_with_validation(responses, parsed_output.title)

topic_prompts += [Toprompt(topic, source, parsed_output.why, parsed_output.what, parsed_output.title)]
Expand All @@ -46,7 +47,7 @@ def _get_toprompt_options(lang: str, topic: Topic, source: TopicPromptSource, ot
partial_variables={"phrase": phrase_to_avoid, "format_instructions": get_output_parser().get_format_instructions()})
curr_response = llm([human_message] + responses + [HumanMessage(content=avoid_prompt.format())])
output_parser = get_output_parser()
parsed_output = output_parser.parse(curr_response.content)
parsed_output = output_parser.parse(escape_json_inner_quotes(curr_response.content))
parsed_output.title = _remove_colon_from_title_with_validation(responses + [curr_response], parsed_output.title)
topic_prompts[-1] = Toprompt(topic, source, parsed_output.why, parsed_output.what, parsed_output.title)

Expand Down
17 changes: 17 additions & 0 deletions app/util/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,23 @@
from basic_langchain.schema import SystemMessage, HumanMessage


def escape_json_inner_quotes(json_string):
"""
Given a JSON string, escape all double quotes that are in values to avoid invalid JSON
Assumes JSON is pretty for
:param json_string:
:return:
"""
pattern = r'(:\s*")(.*?)(?="[,}\n])'

def escape_quotes(match):
# Escape quotes within the matched group
return match.group(1) + match.group(2).replace('"', '\\"')

return re.sub(pattern, escape_quotes, json_string)


def get_source_text_with_fallback(source: TopicPromptSource, lang: str, auto_translate=False) -> str:
text = source.text.get(lang, "")
other_lang = "en" if lang == "he" else "he"
Expand Down

0 comments on commit 3214246

Please sign in to comment.