Skip to content

Commit

Permalink
feat: style guide works. yay.
Browse files Browse the repository at this point in the history
  • Loading branch information
nsantacruz committed Jul 16, 2024
1 parent 05ca7eb commit 6a5aa7a
Showing 1 changed file with 76 additions and 6 deletions.
82 changes: 76 additions & 6 deletions app/topic_prompt/style_guide.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
Make prompts conform to the style guide
"""
import csv
from typing import Optional
from basic_langchain.schema import SystemMessage, HumanMessage
from basic_langchain.chat_models import ChatOpenAI
from util.general import get_by_xml_tag


class StyleGuide:
Expand All @@ -11,14 +15,80 @@ def __init__(self):
self._rules = self._read_style_guide_file()

def _read_style_guide_file(self):
rules = []
with open(self.STYLE_GUIDE_FILE, "r") as fin:
cin = csv.reader(fin)
for row in list(cin)[4:]:
rules.append(row[0].strip())
rules = [
{"title": "Bereshit Rabbah", "gloss": "a talmudic-era midrashic work on the book of Genesis"},
{"title": "Midrash Tanchuma", "gloss": "an early medieval midrash collection"},
{"title": "Mekhilta DeRabbi Yishmael", "gloss": "The Mekhilta DeRabbi Yishmael, an ancient midrash from the land of Israel on the book of Exodus, discusses the significance of matzoh and maror remnants during the Passover Seder."},
{"title": "Mishnah", "gloss": "The Mishnah, the first codification of Jewish law from the early third-century land of Israel, in tractate Chullin, discusses which sacrifices require the correct intention to fulfill their purpose."},
{"title": "Tosefta", "gloss": "The Tosefta, an ancient collection of rabbinic laws and teachings, in tractate Berakhot, uses the example of the biblical Isaac to illustrate this principle."}
]
# with open(self.STYLE_GUIDE_FILE, "r") as fin:
# cin = csv.reader(fin)
# for row in list(cin)[4:]:
# rules.append(row[0].strip())
return rules

def _get_all_titles(self) -> list[str]:
return [r['title'] for r in self._rules]

def _get_title_prompt_uses(self, prompt: str) -> Optional[str]:
system = SystemMessage(content="Given a list of titles of classic Jewish books, output the title that is mentioned in the input string. Titles are wrapped in <titles> tags. Input string is wrapped in <input> tags. Output the title mentioned in <title> tags. If the no title in <titles> is mentioned, output <title>N/A</title>.")
human = HumanMessage(content=f"<titles>{', '.join(self._get_all_titles())}</titles>\n<input>{prompt}</input>")
llm = ChatOpenAI(model="gpt-4o", temperature=0)
response = llm([system, human])
title = get_by_xml_tag(response.content, 'title')
if title is None or title == 'N/A':
return None
return title

def _get_gloss_by_title(self, title: str) -> Optional[str]:
for rule in self._rules:
if rule['title'] == title:
return rule['gloss']

def rewrite_prompt(self, prompt: str) -> str:
title = self._get_title_prompt_uses(prompt)
if title is None:
return prompt
gloss = self._get_gloss_by_title(title)
return self._rewrite_prompt_to_match_example(prompt, title, gloss)

@staticmethod
def _rewrite_prompt_to_match_rule(prompt: str, title: str, gloss: str) -> str:
system = SystemMessage(content="Goal: Write <input> so that when it discussed <title> it uses <gloss> as a dependent clause to explain what <title> is.\n"
"Input:\n<input>: string that mentions <title>. Remove any gloss for <title> and replace it with <gloss>.\n"
"<title>: Title of work mentioned in <input>."
"<gloss>: Gloss of work mentioned in <input>. Should be added as a dependent clause to explain what <title is.\n"
"Output: Output the rewritten <input> using the <gloss>. Output should be wrapped in <output> tags. Refrain from changing anything else in <input> besides the gloss for <title>.\n"
"Example:\n"
"<input>The city of Jerusalem has a unique role in rendering its inhabitants righteous, establishing a foundation for justice. Bereshit Rabbah, a collection of rabbinic interpretations of the Book of Genesis, discusses the role of Malkitzedek, the king of Salem (Jerusalem), in revealing the laws of the High Priesthood and Torah precepts to Abraham.</input>\n"
"<title>Bereshit Rabbah</title>\n"
"<gloss>a talmudic-era midrashic work on the book of Genesis</gloss>\n"
"<output>The city of Jerusalem has a unique role in rendering its inhabitants righteous, establishing a foundation for justice. Bereshit Rabbah, a talmudic-era midrashic work on the book of Genesis, discusses the role of Malkitzedek, the king of Salem (Jerusalem), in revealing the laws of the High Priesthood and Torah precepts to Abraham.</output>")
human = HumanMessage(content=f"<input>{prompt}</input>\n<title>{title}</title>\n<gloss>{gloss}</gloss>")
llm = ChatOpenAI(model="gpt-4o", temperature=0)
response = llm([system, human])
return get_by_xml_tag(response.content, "output")

@staticmethod
def _rewrite_prompt_to_match_example(prompt: str, title: str, example: str) -> str:
system = SystemMessage(content="Goal: Write <input> so that when it introduces <title> in the same way that it is introduced in <example>. DON'T use any other content in <example> except the explanation of <title>.\n"
"Input:\n<input>: string that mentions <title>. Remove any gloss for <title> and replace it with the gloss used in <example>.\n"
"<title>: Title of work mentioned in <input>."
"<example>: An example of how to explain <title>.\n"
"Output: Output the rewritten <input> using the explanation of <title> in <example>. Output should be wrapped in <output> tags. Refrain from changing anything else in <input> besides the gloss for <title>.\n"
"Example:\n"
"<input>Bereshit Rabbah, a collection of rabbinic interpretations of the Book of Genesis, discusses the role of Malkitzedek, the king of Salem (Jerusalem), in revealing the laws of the High Priesthood and Torah precepts to Abraham.</input>\n"
"<title>Bereshit Rabbah</title>\n"
"<example>Bereshit Rabbah, a talmudic-era midrashic work on the book of Genesis, expands upon the biblical narrative in which God warns Isaac against leaving the land of Israel.</example>\n"
"<output>Bereshit Rabbah, a talmudic-era midrashic work on the book of Genesis, discusses the role of Malkitzedek, the king of Salem (Jerusalem), in revealing the laws of the High Priesthood and Torah precepts to Abraham.</output>")
human = HumanMessage(content=f"<input>{prompt}</input>\n<title>{title}</title>\n<example>{example}</example>")
llm = ChatOpenAI(model="gpt-4o", temperature=0)
response = llm([system, human])
return get_by_xml_tag(response.content, "output")


if __name__ == '__main__':
s = StyleGuide()
print(s._rules)
prompt = "The Mishnah Berakhot, a tractate of the Talmud, discusses the importance of intention in the act of sacrifice, and the specific consequences when a sacrifice is made not for its own sake."
print(s.rewrite_prompt(prompt))

0 comments on commit 6a5aa7a

Please sign in to comment.