Skip to content

Commit

Permalink
Fixing flaky test_docs_pickle via retrying (#249)
Browse files Browse the repository at this point in the history
* Using tempfile for docs creation

* Fixing flaky test_docs_pickle by using retrying

* fixed pre-commit

---------

Co-authored-by: Andrew White <white.d.andrew@gmail.com>
  • Loading branch information
jamesbraza and whitead authored Mar 8, 2024
1 parent 7d70c6b commit a50dff1
Showing 1 changed file with 29 additions and 25 deletions.
54 changes: 29 additions & 25 deletions tests/test_paperqa.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import os
import pickle
import tempfile
from io import BytesIO
from unittest import IsolatedAsyncioTestCase

import numpy as np
import pytest
import requests
from openai import AsyncOpenAI

Expand Down Expand Up @@ -1013,57 +1015,59 @@ async def test_adoc_match(self):
assert len(sources) > 0


def test_docs_pickle():
doc_path = "example.html"
with open(doc_path, "w", encoding="utf-8") as f:
def test_docs_pickle() -> None:
# 1. Fill out docs
with tempfile.NamedTemporaryFile(mode="r+", encoding="utf-8", suffix=".html") as f:
# get front page of wikipedia
r = requests.get("https://en.wikipedia.org/wiki/Take_Your_Dog_to_Work_Day")
r.raise_for_status()
f.write(r.text)
docs = Docs(
llm_model=OpenAILLMModel(config=dict(temperature=0.0, model="gpt-3.5-turbo"))
)
assert docs._client is not None
old_config = docs.llm_model.config
old_sconfig = docs.summary_llm_model.config # type: ignore[union-attr]
docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now", chunk_chars=1000) # type: ignore[arg-type]
os.remove(doc_path)
docs = Docs(
llm_model=OpenAILLMModel(
config=dict(temperature=0.0, model="gpt-3.5-turbo")
)
)
assert docs._client is not None
old_config = docs.llm_model.config
old_sconfig = docs.summary_llm_model.config # type: ignore[union-attr]
docs.add(f.name, "WikiMedia Foundation, 2023, Accessed now", chunk_chars=1000) # type: ignore[arg-type]

# 2. Pickle and unpickle, checking unpickled is in-tact
docs_pickle = pickle.dumps(docs)
docs2 = pickle.loads(docs_pickle)
# make sure it fails if we haven't set client
try:
with pytest.raises(ValueError, match="forget to set it after pickling"):
docs2.query("What date is bring your dog to work in the US?")
except ValueError:
pass
docs2.set_client()
assert docs2._client is not None
assert docs2.llm_model.config == old_config
assert docs2.summary_llm_model.config == old_sconfig
assert len(docs.docs) == len(docs2.docs)
context1, context2 = (
docs.get_evidence(
for _ in range(4): # Retry a few times, because this is flaky
docs_context = docs.get_evidence(
Answer(
question="What date is bring your dog to work in the US?",
summary_length="about 20 words",
),
k=3,
max_sources=1,
).context,
docs2.get_evidence(
).context
docs2_context = docs2.get_evidence(
Answer(
question="What date is bring your dog to work in the US?",
summary_length="about 20 words",
),
k=3,
max_sources=1,
).context,
)
print(context1)
print(context2)
assert strings_similarity(context1, context2) >= 0.5
).context
if strings_similarity(s1=docs_context, s2=docs2_context) > 0.75:
break
else:
raise AssertionError("Failed to attain similar contexts, even with retrying.")

# make sure we can query
docs.query("What date is bring your dog to work in the US?")

# make sure we can embed documents
# make sure we can still embed documents
docs2.add_url(
"https://en.wikipedia.org/wiki/Frederick_Bates_(politician)",
citation="WikiMedia Foundation, 2023, Accessed now",
Expand Down

0 comments on commit a50dff1

Please sign in to comment.