From f7c660f94687461364ac9710c77cae54cc99cda7 Mon Sep 17 00:00:00 2001
From: Andrew White <white.d.andrew@gmail.com>
Date: Thu, 22 Feb 2024 09:08:41 -0800
Subject: [PATCH] Propogated extra props in JSON (#235)

---
 .ruff.toml            |  4 ++--
 paperqa/docs.py       |  1 +
 paperqa/types.py      |  3 +++
 paperqa/version.py    |  2 +-
 tests/test_paperqa.py | 41 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/.ruff.toml b/.ruff.toml
index 680291923..8f23e932b 100644
--- a/.ruff.toml
+++ b/.ruff.toml
@@ -1,2 +1,2 @@
-# Allow lines to be as long as 120 characters.
-line-length = 120
+# Allow lines to be as longer.
+line-length = 180
diff --git a/paperqa/docs.py b/paperqa/docs.py
index a05fd2cde..acace0b04 100644
--- a/paperqa/docs.py
+++ b/paperqa/docs.py
@@ -760,6 +760,7 @@ async def process(match):
         context_str = "\n\n".join(
             [
                 f"{c.text.name}: {c.context}"
+                + "".join([f"\n{k}: {v}" for k, v in c.model_extra.items()])
                 + (f"\n\nBased on {c.text.doc.citation}" if detailed_citations else "")
                 for c in answer.contexts
             ]
diff --git a/paperqa/types.py b/paperqa/types.py
index a73beac11..0a954b5ac 100644
--- a/paperqa/types.py
+++ b/paperqa/types.py
@@ -152,6 +152,9 @@ class Context(BaseModel):
     context: str
     text: Text
     score: int = 5
+    model_config = ConfigDict(
+        extra="allow",
+    )
 
 
 def __str__(self) -> str:
diff --git a/paperqa/version.py b/paperqa/version.py
index ad17dad83..d91121f89 100644
--- a/paperqa/version.py
+++ b/paperqa/version.py
@@ -1 +1 @@
-__version__ = "4.0.0-pre.7"
+__version__ = "4.0.0-pre.8"
diff --git a/tests/test_paperqa.py b/tests/test_paperqa.py
index 5af004ff8..6cea22313 100644
--- a/tests/test_paperqa.py
+++ b/tests/test_paperqa.py
@@ -522,6 +522,47 @@ def test_json_evidence():
     os.remove(doc_path)
 
 
+def test_custom_json_props():
+    doc_path = "example.html"
+    with open(doc_path, "w", encoding="utf-8") as f:
+        # get wiki page about politician
+        r = requests.get("https://en.wikipedia.org/wiki/Frederick_Bates_(politician)")
+        f.write(r.text)
+    summary_llm = OpenAILLMModel(
+        config=dict(
+            model="gpt-3.5-turbo-0125",
+            response_format=dict(type="json_object"),
+            temperature=0.0,
+        )
+    )
+    my_prompts = PromptCollection(
+        json_summary=True,
+        summary_json_system="Provide a summary of the excerpt that could help answer the question based on the excerpt.  "
+        "The excerpt may be irrelevant. Do not directly answer the question - only summarize relevant information. "
+        "Respond with the following JSON format:\n\n"
+        '{{\n"summary": "...",\n"person_name": "...",\n"relevance_score": "..."}}\n\n'
+        "where `summary` is relevant information from text - "
+        "about 100 words words, `person_name` specifies the person discussed in "
+        "the excerpt (may be different than query), and `relevance_score` is "
+        "the relevance of `summary` to answer the question (integer out of 10).",
+    )
+    docs = Docs(
+        prompts=my_prompts,
+        summary_llm_model=summary_llm,
+        llm_result_callback=print_callback,
+    )
+    docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now")
+    evidence = docs.get_evidence(
+        Answer(question="For which state was Bates a governor?"), k=1, max_sources=1
+    )
+    assert "person_name" in evidence.contexts[0].model_extra
+    assert "person_name: " in evidence.context
+    print(evidence.context)
+    answer = docs.query("What is Frederick Bates's greatest accomplishment?")
+    assert "person_name" in answer.context
+    os.remove(doc_path)
+
+
 def test_query():
     docs = Docs()
     docs.add_url(