multiple: get rid of pyproject extras (#22581)

They cause `poetry lock` to take a ton of time, and `uv pip install` can resolve the constraints from these toml files in trivial time (addressing problem with #19153) This allows us to properly upgrade lockfile dependencies moving forward, which revealed some issues that were either fixed or type-ignored (see file comments)
langchain-ai · Jun 6, 2024 · a24a9c6 · a24a9c6
1 parent 4367e89
commit a24a9c6
Show file tree

Hide file tree

Showing 34 changed files with 3,678 additions and 12,600 deletions.
diff --git a/.github/workflows/check_diffs.yml b/.github/workflows/check_diffs.yml
@@ -123,7 +123,9 @@ jobs:
         shell: bash
         run: |
           echo "Running extended tests, installing dependencies with poetry..."
-          poetry install -E extended_testing --with test
+          poetry install --with test
+          poetry run pip install uv
+          poetry run uv pip install -r extended_testing_deps.txt
 
       - name: Run extended tests
         run: make extended_tests

diff --git a/docs/docs/contributing/code.mdx b/docs/docs/contributing/code.mdx
@@ -206,30 +206,20 @@ ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogy
 
 `langchain-core` and partner packages **do not use** optional dependencies in this way.
 
-You only need to add a new dependency if a **unit test** relies on the package.
-If your package is only required for **integration tests**, then you can skip these
-steps and leave all pyproject.toml and poetry.lock files alone.
+You'll notice that `pyproject.toml` and `poetry.lock` are **not** touched when you add optional dependencies below.
 
 If you're adding a new dependency to Langchain, assume that it will be an optional dependency, and
 that most users won't have it installed.
 
 Users who do not have the dependency installed should be able to **import** your code without
 any side effects (no warnings, no errors, no exceptions).
 
-To introduce the dependency to the pyproject.toml file correctly, please do the following:
-
-1. Add the dependency to the main group as an optional dependency
-  ```bash
-  poetry add --optional [package_name]
-  ```
-2. Open pyproject.toml and add the dependency to the `extended_testing` extra
-3. Relock the poetry file to update the extra.
-  ```bash
-  poetry lock --no-update
-  ```
-4. Add a unit test that the very least attempts to import the new code. Ideally, the unit
+To introduce the dependency to a library, please do the following:
+
+1. Open extended_testing_deps.txt and add the dependency
+2. Add a unit test that the very least attempts to import the new code. Ideally, the unit
 test makes use of lightweight fixtures to test the logic of the code.
-5. Please use the `@pytest.mark.requires(package_name)` decorator for any tests that require the dependency.
+3. Please use the `@pytest.mark.requires(package_name)` decorator for any unit tests that require the dependency.
 
 ## Adding a Jupyter Notebook
 

diff --git a/libs/community/extended_testing_deps.txt b/libs/community/extended_testing_deps.txt
@@ -0,0 +1,86 @@
+aiosqlite>=0.19.0,<0.20
+aleph-alpha-client>=2.15.0,<3
+anthropic>=0.3.11,<0.4
+arxiv>=1.4,<2
+assemblyai>=0.17.0,<0.18
+atlassian-python-api>=3.36.0,<4
+azure-ai-documentintelligence>=1.0.0b1,<2
+azure-identity>=1.15.0,<2
+azure-search-documents==11.4.0
+beautifulsoup4>=4,<5
+bibtexparser>=1.4.0,<2
+cassio>=0.1.6,<0.2
+chardet>=5.1.0,<6
+cloudpathlib>=0.18,<0.19
+cloudpickle>=2.0.0
+cohere>=4,<6
+databricks-vectorsearch>=0.21,<0.22
+datasets>=2.15.0,<3
+dgml-utils>=0.3.0,<0.4
+elasticsearch>=8.12.0,<9
+esprima>=4.0.1,<5
+faiss-cpu>=1,<2
+feedparser>=6.0.10,<7
+fireworks-ai>=0.9.0,<0.10
+friendli-client>=1.2.4,<2
+geopandas>=0.13.1,<0.14
+gitpython>=3.1.32,<4
+google-cloud-documentai>=2.20.1,<3
+gql>=3.4.1,<4
+gradientai>=1.4.0,<2
+hdbcli>=2.19.21,<3
+hologres-vector==0.0.6
+html2text>=2020.1.16
+httpx>=0.24.1,<0.25
+httpx-sse>=0.4.0,<0.5
+javelin-sdk>=0.1.8,<0.2
+jinja2>=3,<4
+jq>=1.4.1,<2
+jsonschema>1
+lxml>=4.9.3,<6.0
+markdownify>=0.11.6,<0.12
+motor>=3.3.1,<4
+msal>=1.25.0,<2
+mwparserfromhell>=0.6.4,<0.7
+mwxml>=0.3.3,<0.4
+newspaper3k>=0.2.8,<0.3
+numexpr>=2.8.6,<3
+nvidia-riva-client>=2.14.0,<3
+oci>=2.119.1,<3
+openai<2
+openapi-pydantic>=0.3.2,<0.4
+oracle-ads>=2.9.1,<3
+oracledb>=2.2.0,<3
+pandas>=2.0.1,<3
+pdfminer-six>=20221105
+pgvector>=0.1.6,<0.2
+praw>=7.7.1,<8
+premai>=0.3.25,<0.4
+psychicapi>=0.8.0,<0.9
+py-trello>=0.19.0,<0.20
+pyjwt>=2.8.0,<3
+pymupdf>=1.22.3,<2
+pypdf>=3.4.0,<4
+pypdfium2>=4.10.0,<5
+pyspark>=3.4.0,<4
+rank-bm25>=0.2.2,<0.3
+rapidfuzz>=3.1.1,<4
+rapidocr-onnxruntime>=1.3.2,<2
+rdflib==7.0.0
+requests-toolbelt>=1.0.0,<2
+rspace_client>=2.5.0,<3
+scikit-learn>=1.2.2,<2
+simsimd>=4.3.1,<5
+sqlite-vss>=0.1.2,<0.2
+streamlit>=1.18.0,<2
+sympy>=1.12,<2
+telethon>=1.28.5,<2
+tidb-vector>=0.0.3,<1.0.0
+timescale-vector==0.0.1
+tqdm>=4.48.0
+tree-sitter>=0.20.2,<0.21
+tree-sitter-languages>=1.8.0,<2
+upstash-redis>=0.15.0,<0.16
+vdms==0.0.20
+xata>=1.0.0a7,<2
+xmltodict>=0.13.0,<0.14
diff --git a/libs/community/langchain_community/callbacks/tracers/wandb.py b/libs/community/langchain_community/callbacks/tracers/wandb.py
@@ -75,7 +75,7 @@ def _convert_run_to_wb_span(self, run: Run) -> "Span":
         :return: The converted Span.
         """
         attributes = {**run.extra} if run.extra else {}
-        attributes["execution_order"] = run.execution_order
+        attributes["execution_order"] = run.execution_order  # type: ignore
 
         return self.trace_tree.Span(
             span_id=str(run.id) if run.id is not None else None,

diff --git a/libs/community/langchain_community/tools/nuclia/tool.py b/libs/community/langchain_community/tools/nuclia/tool.py
@@ -220,7 +220,7 @@ def _pull_queue(self) -> None:
                 data = MessageToJson(
                     pb,
                     preserving_proto_field_name=True,
-                    including_default_value_fields=True,
+                    including_default_value_fields=True,  # type: ignore
                 )
                 self._results[matching_id]["data"] = data
 

diff --git a/libs/community/langchain_community/vectorstores/docarray/base.py b/libs/community/langchain_community/vectorstores/docarray/base.py
@@ -28,7 +28,7 @@ def _check_docarray_import() -> None:
     except ImportError:
         raise ImportError(
             "Could not import docarray python package. "
-            'Please install it with `pip install "langchain[docarray]"`.'
+            "Please install it with `pip install docarray`."
         )
 
 

diff --git a/libs/community/langchain_community/vectorstores/docarray/hnsw.py b/libs/community/langchain_community/vectorstores/docarray/hnsw.py
@@ -14,7 +14,7 @@ class DocArrayHnswSearch(DocArrayIndex):
     """`HnswLib` storage using `DocArray` package.
 
     To use it, you should have the ``docarray`` package with version >=0.32.0 installed.
-    You can install it with `pip install "docarray[hnswlib]"`.
+    You can install it with `pip install docarray`.
     """
 
     @classmethod

diff --git a/libs/community/langchain_community/vectorstores/docarray/in_memory.py b/libs/community/langchain_community/vectorstores/docarray/in_memory.py
@@ -15,7 +15,7 @@ class DocArrayInMemorySearch(DocArrayIndex):
     """In-memory `DocArray` storage for exact search.
 
     To use it, you should have the ``docarray`` package with version >=0.32.0 installed.
-    You can install it with `pip install "langchain[docarray]"`.
+    You can install it with `pip install docarray`.
     """
 
     @classmethod