diff --git a/.github/workflows/_run_e2e_tests.yml b/.github/workflows/_run_e2e_tests.yml index 96e7f8bde..cd77e00f3 100644 --- a/.github/workflows/_run_e2e_tests.yml +++ b/.github/workflows/_run_e2e_tests.yml @@ -122,6 +122,8 @@ jobs: UNSTRUCTURED_API_URL: "${{ secrets.E2E_TESTS_UNSTRUCTURED_API_URL }}" run: | source scripts/ci-common-env.sh + # workaround for FileLocked https://github.com/datastax/ragstack-ai/actions/runs/8629355336/job/23653432063?pr=316 + rm -rf **/.tox if [ "${{ inputs.suite-name == 'ragstack' }}" == "true" ]; then tox -c ragstack-e2e-tests elif [ "${{ inputs.suite-name == 'langchain' }}" == "true" ]; then diff --git a/pyproject.toml b/pyproject.toml index 000fc6a9c..1b59a391a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,16 +12,36 @@ packages = [{ include = "ragstack" }] [tool.poetry.dependencies] python = ">=3.9,<3.12" astrapy = "~0.7.0" -cassio = "~0.1.3" -unstructured = { version = "0.12.5" } -llama-index = { version = "0.9.48", extras = ["langchain"] } -llama-parse = { version = "0.1.4" } +cassio = "~0.1.4" +unstructured = "0.12.5" + +# llama-index +llama-index = "0.10.28" +llama-index-vector-stores-astra-db = "0.1.5" +llama-index-vector-stores-cassandra = "0.1.3" +llama-index-embeddings-langchain = "0.1.2" +llama-parse = "0.4.0" +# optional integrations +## azure +llama-index-llms-azure-openai = { version = "0.1.5", optional = true } +llama-index-embeddings-azure-openai = { version = "0.1.6", optional = true } +## bedrock +llama-index-llms-bedrock = { version = "0.1.5", optional = true } +llama-index-embeddings-bedrock = { version = "0.1.4", optional = true } +## google +llama-index-llms-gemini = { version = "0.1.7", optional = true } +llama-index-multi-modal-llms-gemini = { version = "0.1.5", optional = true } +llama-index-llms-vertex = { version = "0.1.5", optional = true } +llama-index-embeddings-gemini = { version = "0.1.6", optional = true } + + +# langchain langchain = "0.1.12" langchain-core = "0.1.31" langchain-community = "0.0.28" langchain-astradb = "0.1.0" langchain-openai = "0.0.8" -langchain-google-genai = { version = "0.0.9", optional = true } +langchain-google-genai = { version = "1.0.1", optional = true } langchain-google-vertexai = { version = "0.1.0", optional = true } langchain-nvidia-ai-endpoints = { version = "0.0.3", optional = true } colbert-ai = { version = "0.2.19", optional = true } @@ -33,6 +53,11 @@ langchain-google = ["langchain-google-genai", "langchain-google-vertexai"] langchain-nvidia = ["langchain-nvidia-ai-endpoints"] colbert = ["colbert-ai", "pyarrow", "torch"] +llama-index-google = ["llama-index-llms-gemini", "llama-index-multi-modal-llms-gemini", "llama-index-llms-vertex", "llama-index-embeddings-gemini"] +llama-index-azure = ["llama-index-llms-azure-openai", "llama-index-embeddings-azure-openai"] +llama-index-bedrock = ["llama-index-llms-bedrock", "llama-index-embeddings-bedrock"] + + [tool.poetry.group.test.dependencies] pytest = "*" black = "*" diff --git a/ragstack-e2e-tests/pyproject.langchain.toml b/ragstack-e2e-tests/pyproject.langchain.toml index 7c7eefe54..916270263 100644 --- a/ragstack-e2e-tests/pyproject.langchain.toml +++ b/ragstack-e2e-tests/pyproject.langchain.toml @@ -13,7 +13,6 @@ pytest = "*" black = "*" ruff = "*" google-cloud-aiplatform = "^1.36.4" -boto3 = "^1.29.6" huggingface-hub = "^0.20.3" azure-storage-blob = "^12.19.0" pillow = "^10.2.0" @@ -32,15 +31,31 @@ langchain-core = { git = "https://github.com/langchain-ai/langchain.git", branch langchain-community = { git = "https://github.com/langchain-ai/langchain.git", branch = "master", subdirectory = "libs/community" } langchain-astradb = { git = "https://github.com/langchain-ai/langchain-datastax.git", branch = "main", subdirectory = "libs/astradb" } langchain-openai = { git = "https://github.com/langchain-ai/langchain.git", branch = "master", subdirectory = "libs/partners/openai" } -langchain-google-genai = { git = "https://github.com/langchain-ai/langchain-google.git", branch = "main", subdirectory = "libs/genai" } +# langchain-google-genai main branch depends on google-generativeai 0.5.x which is not supported by llama-index +#langchain-google-genai = { git = "https://github.com/langchain-ai/langchain-google.git", branch = "main", subdirectory = "libs/genai" } +langchain-google-genai = "1.0.1" langchain-google-vertexai = { git = "https://github.com/langchain-ai/langchain-google.git", branch = "main", subdirectory = "libs/vertexai" } langchain-nvidia-ai-endpoints = { git = "https://github.com/langchain-ai/langchain-nvidia.git", branch = "main", subdirectory = "libs/ai-endpoints" } -llama-index = { version = "0.9.48", extras = ["langchain"] } -llama-parse = { version = "0.1.4" } + +llama-index = "0.10.28" +llama-index-vector-stores-astra-db = "0.1.5" +llama-index-vector-stores-cassandra = "0.1.3" +llama-index-embeddings-langchain = "0.1.2" +llama-parse = "0.4.0" +llama-index-llms-azure-openai = { version = "0.1.5" } +llama-index-embeddings-azure-openai = { version = "0.1.6" } +llama-index-llms-bedrock = { version = "0.1.5" } +llama-index-embeddings-bedrock = { version = "0.1.4" } +llama-index-llms-gemini = { version = "0.1.7" } +llama-index-multi-modal-llms-gemini = { version = "0.1.5" } +llama-index-llms-vertex = { version = "0.1.5" } +llama-index-embeddings-gemini = { version = "0.1.6" } + +llama-index-llms-huggingface = "^0.1.0" + astrapy = "~0.7.0" -# we need this specific feature from cassio: https://github.com/CassioML/cassio/pull/128 cassio = "~0.1.4" -unstructured = { git = "https://github.com/Unstructured-IO/unstructured.git", branch = "main" } +unstructured = "0.12.5" [build-system] requires = ["poetry-core"] diff --git a/ragstack-e2e-tests/pyproject.llamaindex.toml b/ragstack-e2e-tests/pyproject.llamaindex.toml index 969e076e8..05b9e5ef2 100644 --- a/ragstack-e2e-tests/pyproject.llamaindex.toml +++ b/ragstack-e2e-tests/pyproject.llamaindex.toml @@ -12,8 +12,6 @@ python = ">=3.9,<3.12,!=3.9.7" pytest = "*" black = "*" ruff = "*" -google-cloud-aiplatform = "^1.36.4" -boto3 = "^1.29.6" huggingface-hub = "^0.20.3" azure-storage-blob = "^12.19.0" pillow = "^10.2.0" @@ -26,6 +24,7 @@ nemoguardrails = "^0.8.0" beautifulsoup4 = "^4" llama-index = { git = "https://github.com/run-llama/llama_index.git", branch = "main" } +llama-index-readers-llama-parse = { git = "https://github.com/run-llama/llama_index.git", branch = "main", subdirectory = "llama-index-integrations/readers/llama-index-readers-llama-parse" } llama-index-embeddings-langchain = { git = "https://github.com/run-llama/llama_index.git", branch = "main", subdirectory = "llama-index-integrations/embeddings/llama-index-embeddings-langchain" } llama-index-vector-stores-astra-db = { git = "https://github.com/run-llama/llama_index.git", branch = "main", subdirectory = "llama-index-integrations/vector_stores/llama-index-vector-stores-astra-db" } llama-index-vector-stores-cassandra = { git = "https://github.com/run-llama/llama_index.git", branch = "main", subdirectory = "llama-index-integrations/vector_stores/llama-index-vector-stores-cassandra" } @@ -47,7 +46,7 @@ langchain-astradb = "0.1.0" langchain-core = "0.1.31" langchain-community = "0.0.28" langchain-openai = "0.0.8" -langchain-google-genai = "0.0.9" +langchain-google-genai = "1.0.1" langchain-google-vertexai = "0.1.0" langchain-nvidia-ai-endpoints = "0.0.3" astrapy = "~0.7.0" diff --git a/ragstack-e2e-tests/pyproject.toml b/ragstack-e2e-tests/pyproject.toml index 6b7dda0a8..39314a7f7 100644 --- a/ragstack-e2e-tests/pyproject.toml +++ b/ragstack-e2e-tests/pyproject.toml @@ -13,8 +13,8 @@ python = ">=3.9,<3.12,!=3.9.7" pytest = "*" black = "*" ruff = "*" -boto3 = "^1.34.21" -huggingface-hub = "^0.20.3" +llama-index-embeddings-huggingface = "^0.1.0" +llama-index-llms-huggingface = "^0.1.0" azure-storage-blob = "^12.19.0" pillow = "^10.2.0" testcontainers = "^3.7.1" @@ -25,12 +25,7 @@ nemoguardrails = "^0.8.0" # From LangChain optional deps, needed by WebBaseLoader beautifulsoup4 = "^4" -ragstack-ai = { path = "../", develop = false, extras = [ - "langchain-google", - "langchain-nvidia", -] } -# we need this specific feature from cassio: https://github.com/CassioML/cassio/pull/128 -cassio = "~0.1.4" +ragstack-ai = { path = "../", develop = false, extras = ["langchain-google", "langchain-nvidia", "llama-index-google", "llama-index-azure", "llama-index-bedrock"]} # benchmarks pyperf = "^2.6.2" diff --git a/tests/unit_tests/test_ragstack.py b/tests/unit_tests/test_ragstack.py index a28cbf2e0..3b583e025 100644 --- a/tests/unit_tests/test_ragstack.py +++ b/tests/unit_tests/test_ragstack.py @@ -1,6 +1,6 @@ def test_import(): - from llama_index.vector_stores import AstraDBVectorStore # noqa - from llama_index.vector_stores import CassandraVectorStore # noqa + from llama_index.vector_stores.astra_db import AstraDBVectorStore # noqa + from llama_index.vector_stores.cassandra import CassandraVectorStore # noqa from langchain.vectorstores import AstraDB # noqa from langchain_astradb import AstraDBVectorStore # noqa import langchain_core # noqa