diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 381f2bdf9..baccccee5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,11 +20,11 @@ repos: - id: black language_version: python3 - repo: https://github.com/pre-commit/mirrors-mypy - rev: "v1.3.0" + rev: v1.8.0 hooks: - id: mypy args: [--pretty, --ignore-missing-imports] - additional_dependencies: [types-requests] + additional_dependencies: [types-requests, types-setuptools] - repo: https://github.com/PyCQA/isort rev: "5.12.0" hooks: diff --git a/dev-requirements.txt b/dev-requirements.txt index c56d05280..11052e634 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -5,6 +5,7 @@ pyzotero python-dotenv pymupdf build +types-setuptools types-requests langchain_openai langchain_community diff --git a/paperqa/contrib/zotero.py b/paperqa/contrib/zotero.py index 1d4330dc9..7640e499f 100644 --- a/paperqa/contrib/zotero.py +++ b/paperqa/contrib/zotero.py @@ -128,7 +128,7 @@ def get_pdf(self, item: dict) -> Union[Path, None]: if pdf_key is None: return None - pdf_path: Path = Path(self.storage / (pdf_key + ".pdf")) # type: ignore + pdf_path: Path = Path(self.storage / (pdf_key + ".pdf")) # type: ignore[operator] if not pdf_path.exists(): pdf_path.parent.mkdir(parents=True, exist_ok=True) diff --git a/paperqa/docs.py b/paperqa/docs.py index acace0b04..a70f38f9a 100644 --- a/paperqa/docs.py +++ b/paperqa/docs.py @@ -422,7 +422,7 @@ async def aadd( # get first name and year from citation match = re.search(r"([A-Z][a-z]+)", citation) if match is not None: - author = match.group(1) # type: ignore + author = match.group(1) else: # panicking - no word?? raise ValueError( @@ -433,7 +433,7 @@ async def aadd( year = "" match = re.search(r"(\d{4})", citation) if match is not None: - year = match.group(1) # type: ignore + year = match.group(1) docname = f"{author}{year}" docname = self._get_unique_name(docname) doc = Doc(docname=docname, citation=citation, dockey=dockey) @@ -619,7 +619,7 @@ async def aget_evidence( ) -> Answer: if len(self.docs) == 0 and self.docs_index is None: # do we have no docs? - return answer + return answer # type: ignore[unreachable] self._build_texts_index(keys=answer.dockey_filter) _k = k if answer.dockey_filter is not None: @@ -663,13 +663,13 @@ async def process(match): score = 5 else: if self.prompts.json_summary: - summary_chain = self.summary_llm_model.make_chain( + summary_chain = self.summary_llm_model.make_chain( # type: ignore[union-attr] client=self._client, prompt=self.prompts.summary_json, system_prompt=self.prompts.summary_json_system, ) else: - summary_chain = self.summary_llm_model.make_chain( + summary_chain = self.summary_llm_model.make_chain( # type: ignore[union-attr] client=self._client, prompt=self.prompts.summary, system_prompt=self.prompts.system, diff --git a/paperqa/llms.py b/paperqa/llms.py index d7c7fa182..1d70c4c5e 100644 --- a/paperqa/llms.py +++ b/paperqa/llms.py @@ -205,9 +205,9 @@ async def execute( f for f in callbacks if not is_coroutine_callable(f) ] async_callbacks = [f for f in callbacks if is_coroutine_callable(f)] - completion = self.achat_iter(client, messages) # type: ignore + completion = self.achat_iter(client, messages) text_result = [] - async for chunk in completion: # type: ignore + async for chunk in completion: # type: ignore[attr-defined] if chunk: if result.seconds_to_first_token == 0: result.seconds_to_first_token = ( @@ -250,12 +250,12 @@ async def execute( ] async_callbacks = [f for f in callbacks if is_coroutine_callable(f)] - completion = self.acomplete_iter( # type: ignore + completion = self.acomplete_iter( client, formatted_prompt, ) text_result = [] - async for chunk in completion: # type: ignore + async for chunk in completion: # type: ignore[attr-defined] if chunk: if result.seconds_to_first_token == 0: result.seconds_to_first_token = ( @@ -289,7 +289,7 @@ def _check_client(self, client: Any) -> AsyncOpenAI: raise ValueError( f"Your client is not a required AsyncOpenAI client. It is a {type(client)}" ) - return cast(AsyncOpenAI, client) + return client @model_validator(mode="after") @classmethod @@ -323,14 +323,14 @@ async def acomplete_iter(self, client: Any, prompt: str) -> Any: async def achat(self, client: Any, messages: list[dict[str, str]]) -> str: aclient = self._check_client(client) completion = await aclient.chat.completions.create( - messages=messages, **process_llm_config(self.config) # type: ignore + messages=messages, **process_llm_config(self.config) ) return completion.choices[0].message.content or "" async def achat_iter(self, client: Any, messages: list[dict[str, str]]) -> Any: aclient = self._check_client(client) completion = await aclient.chat.completions.create( - messages=messages, **process_llm_config(self.config), stream=True # type: ignore + messages=messages, **process_llm_config(self.config), stream=True ) async for chunk in cast(AsyncGenerator, completion): yield chunk.choices[0].delta.content @@ -635,13 +635,13 @@ def add_texts_and_embeddings(self, texts: Sequence[Embeddable]) -> None: else: raise ValueError("Only embeddings of type Text are supported") if self._store is None: - self._store = self._store_builder( # type: ignore + self._store = self._store_builder( vec_store_text_and_embeddings, texts, ) if self._store is None or not hasattr(self._store, "add_embeddings"): raise ValueError("store_builder did not return a valid vectorstore") - self._store.add_embeddings( # type: ignore + self._store.add_embeddings( vec_store_text_and_embeddings, metadatas=texts, ) diff --git a/paperqa/types.py b/paperqa/types.py index 0a954b5ac..8d051db83 100644 --- a/paperqa/types.py +++ b/paperqa/types.py @@ -192,7 +192,7 @@ def remove_computed(cls, data: Any) -> Any: data.pop("used_contexts", None) return data - @computed_field # type: ignore + @computed_field # type: ignore[misc] @property def used_contexts(self) -> set[str]: """Return the used contexts.""" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..b0ff3d329 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,49 @@ +[tool.mypy] +# Type-checks the interior of functions without type annotations. +check_untyped_defs = true +# Allows enabling one or multiple error codes globally. Note: This option will +# override disabled error codes from the disable_error_code option. +enable_error_code = [ + "ignore-without-code", + "mutable-override", + "redundant-cast", + "redundant-expr", + "redundant-self", + "truthy-bool", + "truthy-iterable", + "unreachable", + "unused-awaitable", + "unused-ignore", +] +# Shows a short summary line after error messages. +error_summary = false +# Use visually nicer output in error messages: use soft word wrap, show source +# code snippets, and show error location markers. +pretty = true +# Shows column numbers in error messages. +show_column_numbers = true +# Shows error codes in error messages. +# SEE: https://mypy.readthedocs.io/en/stable/error_codes.html#error-codes +show_error_codes = true +# Prefixes each error with the relevant context. +show_error_context = true +# Warns about casting an expression to its inferred type. +warn_redundant_casts = true +# Shows a warning when encountering any code inferred to be unreachable or +# redundant after performing type analysis. +warn_unreachable = true +# Warns about per-module sections in the config file that do not match any +# files processed when invoking mypy. +warn_unused_configs = true +# Warns about unneeded `# type: ignore` comments. +warn_unused_ignores = true + +[[tool.mypy.overrides]] +# Suppresses error messages about imports that cannot be resolved. +ignore_missing_imports = true +# Per-module configuration options +module = [ + "fitz", + "pyzotero", # SEE: https://github.com/urschrei/pyzotero/issues/110 + "sentence_transformers", # SEE: https://github.com/UKPLab/sentence-transformers/issues/1723 +] diff --git a/tests/test_paperqa.py b/tests/test_paperqa.py index 6cea22313..cf2c2a7d6 100644 --- a/tests/test_paperqa.py +++ b/tests/test_paperqa.py @@ -413,13 +413,13 @@ async def test_chain_completion(self): def accum(x): outputs.append(x) - completion = await call(dict(animal="duck"), callbacks=[accum]) + completion = await call(dict(animal="duck"), callbacks=[accum]) # type: ignore[call-arg] assert completion.seconds_to_first_token > 0 assert completion.prompt_count > 0 assert completion.completion_count > 0 assert str(completion) == "".join(outputs) - completion = await call(dict(animal="duck")) + completion = await call(dict(animal="duck")) # type: ignore[call-arg] assert completion.seconds_to_first_token == 0 assert completion.seconds_to_last_token > 0 @@ -438,13 +438,13 @@ async def test_chain_chat(self): def accum(x): outputs.append(x) - completion = await call(dict(animal="duck"), callbacks=[accum]) + completion = await call(dict(animal="duck"), callbacks=[accum]) # type: ignore[call-arg] assert completion.seconds_to_first_token > 0 assert completion.prompt_count > 0 assert completion.completion_count > 0 assert str(completion) == "".join(outputs) - completion = await call(dict(animal="duck")) + completion = await call(dict(animal="duck")) # type: ignore[call-arg] assert completion.seconds_to_first_token == 0 assert completion.seconds_to_last_token > 0 @@ -452,7 +452,7 @@ def accum(x): async def ac(x): pass - completion = await call(dict(animal="duck"), callbacks=[accum, ac]) + completion = await call(dict(animal="duck"), callbacks=[accum, ac]) # type: ignore[call-arg] def test_docs(): @@ -474,7 +474,7 @@ def test_evidence(): r = requests.get("https://en.wikipedia.org/wiki/Frederick_Bates_(politician)") f.write(r.text) docs = Docs() - docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") + docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") # type: ignore[arg-type] evidence = docs.get_evidence( Answer(question="For which state was Bates a governor?"), k=1, max_sources=1 ) @@ -507,7 +507,7 @@ def test_json_evidence(): summary_llm_model=summary_llm, llm_result_callback=print_callback, ) - docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") + docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") # type: ignore[arg-type] evidence = docs.get_evidence( Answer(question="For which state was Bates a governor?"), k=1, max_sources=1 ) @@ -551,7 +551,7 @@ def test_custom_json_props(): summary_llm_model=summary_llm, llm_result_callback=print_callback, ) - docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") + docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") # type: ignore[arg-type] evidence = docs.get_evidence( Answer(question="For which state was Bates a governor?"), k=1, max_sources=1 ) @@ -664,7 +664,7 @@ def test_sentence_transformer_embedding(): citation="WikiMedia Foundation, 2023, Accessed now", dockey="test", ) - assert any(docs.docs["test"].embedding) + assert any(docs.docs["test"].embedding) # type: ignore[arg-type] docs = Docs( texts_index=NumpyVectorStore( @@ -679,7 +679,7 @@ def test_sentence_transformer_embedding(): citation="WikiMedia Foundation, 2023, Accessed now", dockey="test", ) - assert any(docs.docs["test"].embedding) + assert any(docs.docs["test"].embedding) # type: ignore[arg-type] def test_custom_llm(): @@ -762,7 +762,7 @@ def test_langchain_llm(): get_callbacks=lambda x: [lambda y: print(y, end="")], ) - assert docs.summary_llm_model.llm_type == "completion" + assert docs.summary_llm_model.llm_type == "completion" # type: ignore[union-attr] # trying without callbacks (different codepath) docs.get_evidence( @@ -823,19 +823,19 @@ async def test_langchain_vector_store(self): try: index = LangchainVectorStore() index.add_texts_and_embeddings(some_texts) - raise "Failed to check for builder" + raise "Failed to check for builder" # type: ignore[misc] except ValueError: pass try: index = LangchainVectorStore(store_builder=lambda x: None) - raise "Failed to count arguments" + raise "Failed to count arguments" # type: ignore[misc] except ValueError: pass try: index = LangchainVectorStore(store_builder="foo") - raise "Failed to check if builder is callable" + raise "Failed to check if builder is callable" # type: ignore[misc] except ValueError: pass @@ -847,7 +847,7 @@ async def test_langchain_vector_store(self): index.add_texts_and_embeddings(some_texts) assert index._store is not None # check search returns Text obj - data, score = await index.similarity_search(None, "test", k=1) + data, score = await index.similarity_search(None, "test", k=1) # type: ignore[unreachable] print(data) assert type(data[0]) == Text @@ -938,8 +938,8 @@ def test_docs_pickle(): ) assert docs._client is not None old_config = docs.llm_model.config - old_sconfig = docs.summary_llm_model.config - docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now", chunk_chars=1000) + old_sconfig = docs.summary_llm_model.config # type: ignore[union-attr] + docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now", chunk_chars=1000) # type: ignore[arg-type] os.remove(doc_path) docs_pickle = pickle.dumps(docs) docs2 = pickle.loads(docs_pickle) @@ -989,7 +989,7 @@ def test_bad_context(): r = requests.get("https://en.wikipedia.org/wiki/Frederick_Bates_(politician)") f.write(r.text) docs = Docs() - docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") + docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") # type: ignore[arg-type] answer = docs.query("What is the radius of Jupyter?") assert "cannot answer" in answer.answer os.remove(doc_path) @@ -1004,9 +1004,9 @@ def test_repeat_keys(): docs = Docs( llm_model=OpenAILLMModel(config=dict(temperature=0.0, model="babbage-002")) ) - docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") + docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") # type: ignore[arg-type] try: - docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") + docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") # type: ignore[arg-type] except ValueError: pass assert len(docs.docs) == 1 @@ -1017,7 +1017,7 @@ def test_repeat_keys(): # get wiki page about politician f.write(r.text) f.write("\n") # so we don't have same hash - docs.add(doc_path2, "WikiMedia Foundation, 2023, Accessed now") + docs.add(doc_path2, "WikiMedia Foundation, 2023, Accessed now") # type: ignore[arg-type] assert len(docs.docs) == 2 # check keys @@ -1033,7 +1033,7 @@ def test_pdf_reader(): tests_dir = os.path.dirname(os.path.abspath(__file__)) doc_path = os.path.join(tests_dir, "paper.pdf") docs = Docs(llm_model=OpenAILLMModel(config=dict(temperature=0.0, model="gpt-4"))) - docs.add(doc_path, "Wellawatte et al, XAI Review, 2023") + docs.add(doc_path, "Wellawatte et al, XAI Review, 2023") # type: ignore[arg-type] answer = docs.query("Are counterfactuals actionable? [yes/no]") assert "yes" in answer.answer or "Yes" in answer.answer @@ -1067,14 +1067,14 @@ def test_pdf_pypdf_reader(): tests_dir = os.path.dirname(os.path.abspath(__file__)) doc_path = os.path.join(tests_dir, "paper.pdf") splits1 = read_doc( - doc_path, + doc_path, # type: ignore[arg-type] Doc(docname="foo", citation="Foo et al, 2002", dockey="1"), force_pypdf=True, overlap=100, chunk_chars=3000, ) splits2 = read_doc( - doc_path, + doc_path, # type: ignore[arg-type] Doc(docname="foo", citation="Foo et al, 2002", dockey="1"), force_pypdf=False, overlap=100, @@ -1093,7 +1093,7 @@ def test_prompt_length(): r = requests.get("https://en.wikipedia.org/wiki/Frederick_Bates_(politician)") f.write(r.text) docs = Docs() - docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") + docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") # type: ignore[arg-type] docs.query("What is the name of the politician?") @@ -1103,7 +1103,7 @@ def test_code(): docs = Docs( llm_model=OpenAILLMModel(config=dict(temperature=0.0, model="babbage-002")) ) - docs.add(doc_path, "test_paperqa.py", docname="test_paperqa.py", disable_check=True) + docs.add(doc_path, "test_paperqa.py", docname="test_paperqa.py", disable_check=True) # type: ignore[arg-type] assert len(docs.docs) == 1 docs.query("What function tests the preview?") @@ -1115,7 +1115,7 @@ def test_citation(): r = requests.get("https://en.wikipedia.org/wiki/Frederick_Bates_(politician)") f.write(r.text) docs = Docs() - docs.add(doc_path) + docs.add(doc_path) # type: ignore[arg-type] assert ( list(docs.docs.values())[0].docname == "Wikipedia2024" or list(docs.docs.values())[0].docname == "Frederick2024" @@ -1132,12 +1132,12 @@ def test_dockey_filter(): r = requests.get("https://en.wikipedia.org/wiki/Frederick_Bates_(politician)") f.write(r.text) docs = Docs() - docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") + docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") # type: ignore[arg-type] # add with new dockey with open("example.txt", "w", encoding="utf-8") as f: f.write(r.text) f.write("\n") # so we don't have same hash - docs.add("example.txt", "WikiMedia Foundation, 2023, Accessed now", dockey="test") + docs.add("example.txt", "WikiMedia Foundation, 2023, Accessed now", dockey="test") # type: ignore[arg-type] answer = Answer(question="What country is Bates from?", dockey_filter=["test"]) docs.get_evidence(answer) @@ -1150,12 +1150,12 @@ def test_dockey_delete(): r = requests.get("https://en.wikipedia.org/wiki/Frederick_Bates_(politician)") f.write(r.text) docs = Docs() - docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") + docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") # type: ignore[arg-type] # add with new dockey with open("example.txt", "w", encoding="utf-8") as f: f.write(r.text) f.write("\n\nBates could be from Angola") # so we don't have same hash - docs.add("example.txt", "WikiMedia Foundation, 2023, Accessed now", docname="test") + docs.add("example.txt", "WikiMedia Foundation, 2023, Accessed now", docname="test") # type: ignore[arg-type] answer = Answer(question="What country was Bates born in?") answer = docs.get_evidence( answer, max_sources=25, k=30 @@ -1182,14 +1182,14 @@ def test_query_filter(): f.write(r.text) docs = Docs() docs.add( - doc_path, + doc_path, # type: ignore[arg-type] "Information about Fredrick Bates, WikiMedia Foundation, 2023, Accessed now", ) # add with new dockey with open("example.txt", "w", encoding="utf-8") as f: f.write(r.text) f.write("\n") # so we don't have same hash - docs.add("example.txt", "WikiMedia Foundation, 2023, Accessed now", dockey="test") + docs.add("example.txt", "WikiMedia Foundation, 2023, Accessed now", dockey="test") # type: ignore[arg-type] docs.query("What country is Bates from?", key_filter=True) # the filter shouldn't trigger, so just checking that it doesn't crash @@ -1212,13 +1212,13 @@ def test_too_much_evidence(): r = requests.get("https://en.wikipedia.org/wiki/Barack_Obama") f.write(r.text) docs = Docs(llm="gpt-3.5-turbo", summary_llm="gpt-3.5-turbo") - docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") + docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") # type: ignore[arg-type] # add with new dockey with open("example.txt", "w", encoding="utf-8") as f: f.write(r.text) f.write("\n") # so we don't have same hash docs.add( - "example.txt", + "example.txt", # type: ignore[arg-type] "WikiMedia Foundation, 2023, Accessed now", dockey="test", chunk_chars=4000, @@ -1240,7 +1240,7 @@ def test_custom_prompts(): # get wiki page about politician r = requests.get("https://en.wikipedia.org/wiki/Frederick_Bates_(politician)") f.write(r.text) - docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") + docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") # type: ignore[arg-type] answer = docs.query("What country is Frederick Bates from?") assert "United States" in answer.answer @@ -1255,7 +1255,7 @@ def test_pre_prompt(): # get wiki page about politician r = requests.get("https://en.wikipedia.org/wiki/Frederick_Bates_(politician)") f.write(r.text) - docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") + docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") # type: ignore[arg-type] docs.query("What country is Bates from?") @@ -1275,7 +1275,7 @@ def test_post_prompt(): # get wiki page about politician r = requests.get("https://en.wikipedia.org/wiki/Frederick_Bates_(politician)") f.write(r.text) - docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") + docs.add(doc_path, "WikiMedia Foundation, 2023, Accessed now") # type: ignore[arg-type] docs.query("What country is Bates from?") docs = Docs(