Skip to content

Commit

Permalink
ArxivAPIWrapper - doc_content_chars_max (#6063)
Browse files Browse the repository at this point in the history
This PR refactors the ArxivAPIWrapper class making
`doc_content_chars_max` parameter optional. Additionally, tests have
been added to ensure the functionality of the doc_content_chars_max
parameter.

Fixes #6027 (issue)
  • Loading branch information
hp0404 authored Jun 16, 2023
1 parent a9b97aa commit b01cf0d
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 2 deletions.
4 changes: 2 additions & 2 deletions langchain/utilities/arxiv.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Util that calls Arxiv."""
import logging
import os
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional

from pydantic import BaseModel, Extra, root_validator

Expand Down Expand Up @@ -38,7 +38,7 @@ class ArxivAPIWrapper(BaseModel):
ARXIV_MAX_QUERY_LENGTH = 300
load_max_docs: int = 100
load_all_available_meta: bool = False
doc_content_chars_max: int = 4000
doc_content_chars_max: Optional[int] = 4000

class Config:
"""Configuration for this pydantic object."""
Expand Down
18 changes: 18 additions & 0 deletions tests/integration_tests/utilities/test_arxiv.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,24 @@ def test_load_returns_limited_docs() -> None:
assert_docs(docs)


def test_load_returns_limited_doc_content_chars() -> None:
"""Test that returns limited doc_content_chars_max"""

doc_content_chars_max = 100
api_client = ArxivAPIWrapper(doc_content_chars_max=doc_content_chars_max)
docs = api_client.load("1605.08386")
assert len(docs[0].page_content) == doc_content_chars_max


def test_load_returns_unlimited_doc_content_chars() -> None:
"""Test that returns unlimited doc_content_chars_max"""

doc_content_chars_max = None
api_client = ArxivAPIWrapper(doc_content_chars_max=doc_content_chars_max)
docs = api_client.load("1605.08386")
assert len(docs[0].page_content) == 54337


def test_load_returns_full_set_of_metadata() -> None:
"""Test that returns several docs"""
api_client = ArxivAPIWrapper(load_max_docs=1, load_all_available_meta=True)
Expand Down

0 comments on commit b01cf0d

Please sign in to comment.