diff --git a/experiments/topic_source_curation/scripts/make_ref_topic_links.py b/experiments/topic_source_curation/scripts/make_ref_topic_links.py index 7b0c88d..4226d17 100644 --- a/experiments/topic_source_curation/scripts/make_ref_topic_links.py +++ b/experiments/topic_source_curation/scripts/make_ref_topic_links.py @@ -3,7 +3,7 @@ from experiments.topic_source_curation.curator import get_topics_to_curate import django -from sefaria.model import RefTopicLink +from sefaria.model import RefTopicLink, Ref from sefaria.model import Topic as SefariaTopic from sefaria.helper.llm.topic_prompt import make_llm_topic django.setup() @@ -13,7 +13,7 @@ def _make_ref_topic_link(topic, tref, context, i): return { "toTopic": topic.slug, - "ref": tref, + "ref": Ref(tref).normal(), "linkType": "about", "class": "refTopic", "dataSource": "learning-team", @@ -25,7 +25,7 @@ def _make_ref_topic_link(topic, tref, context, i): }, "descriptions": { "en": { - "ai_context": context, + "ai_context": "N/A", "published": False, "review_state": "not reviewed", } @@ -35,28 +35,55 @@ def _make_ref_topic_link(topic, tref, context, i): def save_ref_topic_links(): import json - with open("data/private/ref_topic_links.json", "r") as fin: + with open("scripts/ref_topic_links.json", "r") as fin: links = json.load(fin) len(links) for link in links: + try: + existing_links = RefTopicLinkSet({ + "toTopic": link["toTopic"], + "ref": link["ref"], + "linkType": link["linkType"], + "dataSource": link["dataSource"], + }) + except Exception as e: + continue + print(e) + if existing_links: + existing_links.delete() RefTopicLink(link).save() def _generate_all_prompts(): from tqdm import tqdm - slugs_to_generate = {l.toTopic for l in RefTopicLinkSet({"generatedBy": "auto-curator"})} slugs_to_generate = [ - 'balaam', - 'caleb', - 'parents', - 'parah-adumah', - 'hunger', - 'disability', - 'aarons-death', - 'josephs-dream', - 'empathy', - 'leviathan', - 'memory', + "naaman", + "nephilim", + "naftali", + "sisera", + "sennacherib", + "serah-the-daughter-of-asher", + "iddo", + "obadiah", + "og", + "uzziah", + "ezra", + "achan", + "the-sons-of-eli", + "eli", + "amos", + "amram", + "amasa", + "efron", + "er-(firstborn-son-of-judah)", + "esau", + "potiphar", + "the-concubine-of-givah", + "pharaoh", + "zelophehad", + "keturah", + "cain", ] + for slug in tqdm(slugs_to_generate): _generate_prompts_for_slug(slug) @@ -72,12 +99,10 @@ def _generate_prompts_for_slug(slug): if __name__ == '__main__': links = [] - # topics = random.sample(get_topics_to_curate(), 50) - topics = [make_llm_topic(SefariaTopic.init(slug)) for slug in [ - 'creation-of-man' - ]] + # topics = get_topics_to_curate()[94:120] + topics = [make_llm_topic(SefariaTopic.init('abel'))] for topic in topics: - print(topic.slug) + print(f'"{topic.slug}",') with open(f"output/curation_{topic.slug}.json", "r") as fin: curation = json.load(fin) for i, entry in enumerate(curation):