From f7c660f94687461364ac9710c77cae54cc99cda7 Mon Sep 17 00:00:00 2001 From: Andrew White Date: Thu, 22 Feb 2024 09:08:41 -0800 Subject: [PATCH] Propogated extra props in JSON (#235) --- .ruff.toml | 4 ++-- paperqa/docs.py | 1 + paperqa/types.py | 3 +++ paperqa/version.py | 2 +- tests/test_paperqa.py | 41 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 48 insertions(+), 3 deletions(-) diff --git a/.ruff.toml b/.ruff.toml index 680291923..8f23e932b 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -1,2 +1,2 @@ -# Allow lines to be as long as 120 characters. -line-length = 120 +# Allow lines to be as longer. +line-length = 180 diff --git a/paperqa/docs.py b/paperqa/docs.py index a05fd2cde..acace0b04 100644 --- a/paperqa/docs.py +++ b/paperqa/docs.py @@ -760,6 +760,7 @@ async def process(match): context_str = "\n\n".join( [ f"{c.text.name}: {c.context}" + + "".join([f"\n{k}: {v}" for k, v in c.model_extra.items()]) + (f"\n\nBased on {c.text.doc.citation}" if detailed_citations else "") for c in answer.contexts ] diff --git a/paperqa/types.py b/paperqa/types.py index a73beac11..0a954b5ac 100644 --- a/paperqa/types.py +++ b/paperqa/types.py @@ -152,6 +152,9 @@ class Context(BaseModel): context: str text: Text score: int = 5 + model_config = ConfigDict( + extra="allow", + ) def __str__(self) -> str: diff --git a/paperqa/version.py b/paperqa/version.py index ad17dad83..d91121f89 100644 --- a/paperqa/version.py +++ b/paperqa/version.py @@ -1 +1 @@ -__version__ = "4.0.0-pre.7" +__version__ = "4.0.0-pre.8" diff --git a/tests/test_paperqa.py b/tests/test_paperqa.py index 5af004ff8..6cea22313 100644 --- a/tests/test_paperqa.py +++ b/tests/test_paperqa.py @@ -522,6 +522,47 @@ def test_json_evidence(): os.remove(doc_path) +def test_custom_json_props(): + doc_path = "example.html" + with open(doc_path, "w", encoding="utf-8") as f: + # get wiki page about politician + r = requests.get("https://en.wikipedia.org/wiki/Frederick_Bates_(politician)") + f.write(r.text) + summary_llm = OpenAILLMModel( + config=dict( + model="gpt-3.5-turbo-0125", + response_format=dict(type="json_object"), + temperature=0.0, + ) + ) + my_prompts = PromptCollection( + json_summary=True, + summary_json_system="Provide a summary of the excerpt that could help answer the question based on the excerpt. " + "The excerpt may be irrelevant. Do not directly answer the question - only summarize relevant information. " + "Respond with the following JSON format:\n\n" + '{{\n"summary": "...",\n"person_name": "...",\n"relevance_score": "..."}}\n\n' + "where `summary` is relevant information from text - " + "about 100 words words, `person_name` specifies the person discussed in " + "the excerpt (may be different than query), and `relevance_score` is " + "the relevance of `summary` to answer the question (integer out of 10).", + ) + docs = Docs( + prompts=my_prompts, + summary_llm_model=summary_llm, + llm_result_callback=print_callback, + ) + docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") + evidence = docs.get_evidence( + Answer(question="For which state was Bates a governor?"), k=1, max_sources=1 + ) + assert "person_name" in evidence.contexts[0].model_extra + assert "person_name: " in evidence.context + print(evidence.context) + answer = docs.query("What is Frederick Bates's greatest accomplishment?") + assert "person_name" in answer.context + os.remove(doc_path) + + def test_query(): docs = Docs() docs.add_url(