From 6670831259932c26b08a5ec01dae7367783ceca2 Mon Sep 17 00:00:00 2001 From: Sara Zan Date: Thu, 14 Jul 2022 19:03:33 +0100 Subject: [PATCH] Mocking Pinecone tests (#2778) * Integrating the mock into conftest.py * re-enable workflow * delete_all * Update Documentation & Code Style * remove ValueError * Add empty response * wrong condition * return response * revert removal of delete_all * change mock * Update Documentation & Code Style * test for rest api, to revert Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- .github/workflows/tests.yml | 104 +++++++++---------- test/conftest.py | 51 +++++++-- test/mocks/{pinecone_mock.py => pinecone.py} | 20 +++- 3 files changed, 109 insertions(+), 66 deletions(-) rename test/mocks/{pinecone_mock.py => pinecone.py} (90%) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index edbc898760..53a9107c87 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -428,68 +428,64 @@ jobs: # pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=weaviate - # FIXME: This block should be uncommented as soon as Pinecone tests are fixed - # as part of the fixes discussed in #2644. - # Check locally for these tests to pass before uncommenting. - # - # pinecone-tests-linux: - # needs: - # - mypy - # - pylint - # runs-on: ubuntu-latest - # if: contains(github.event.pull_request.labels.*.name, 'topic:pinecone') || !github.event.pull_request.draft + pinecone-tests-linux: + needs: + - mypy + - pylint + runs-on: ubuntu-latest + if: contains(github.event.pull_request.labels.*.name, 'topic:pinecone') || !github.event.pull_request.draft - # steps: - # - uses: actions/checkout@v2 + steps: + - uses: actions/checkout@v2 - # - name: Setup Python - # uses: ./.github/actions/python_cache/ + - name: Setup Python + uses: ./.github/actions/python_cache/ - # # TODO Let's try to remove this one from the unit tests - # - name: Install pdftotext - # run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin + # TODO Let's try to remove this one from the unit tests + - name: Install pdftotext + run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin - # - name: Install Haystack - # run: pip install .[pinecone] + - name: Install Haystack + run: pip install .[pinecone] - # - name: Run tests - # env: - # PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} - # TOKENIZERS_PARALLELISM: 'false' - # run: | - # pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=pinecone + - name: Run tests + env: + PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} + TOKENIZERS_PARALLELISM: 'false' + run: | + pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=pinecone - # FIXME very slow and very little platform dependency, so to evaluate - # pinecone-tests-windows: - # needs: - # - mypy - # - pylint - # runs-on: windows-latest - # if: contains(github.event.pull_request.labels.*.name, 'topic:pinecone') && contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft - # steps: - # - uses: actions/checkout@v2 + pinecone-tests-windows: + needs: + - mypy + - pylint + runs-on: windows-latest + if: contains(github.event.pull_request.labels.*.name, 'topic:pinecone') && contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft - # - name: Setup Python - # uses: ./.github/actions/python_cache/ - # with: - # prefix: windows + steps: + - uses: actions/checkout@v2 - # - name: Install pdftotext - # run: | - # choco install xpdf-utils - # choco install openjdk11 - # refreshenv + - name: Setup Python + uses: ./.github/actions/python_cache/ + with: + prefix: windows - # - name: Install Haystack - # run: pip install .[pinecone] + - name: Install pdftotext + run: | + choco install xpdf-utils + choco install openjdk11 + refreshenv - # - name: Run tests - # env: - # TOKENIZERS_PARALLELISM: 'false' - # PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} - # run: | - # pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=pinecone + - name: Install Haystack + run: pip install .[pinecone] + + - name: Run tests + env: + TOKENIZERS_PARALLELISM: 'false' + PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} + run: | + pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=pinecone rest-and-ui: needs: @@ -510,9 +506,9 @@ jobs: - name: Install REST API and UI run: | - pip install rest_api/ - pip install ui/ - + pip install -U rest_api/ + pip install -U ui/ + - name: Run tests run: | pytest ${{ env.PYTEST_PARAMS }} rest_api/ ui/ diff --git a/test/conftest.py b/test/conftest.py index ebc7069645..451f3ee59b 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -72,6 +72,8 @@ from haystack.schema import Document +from .mocks import pinecone as pinecone_mock + # To manually run the tests with default PostgreSQL instead of SQLite, switch the lines below SQL_TYPE = "sqlite" @@ -159,9 +161,9 @@ def pytest_collection_modifyitems(config, items): item.add_marker(skip_milvus) # Skip PineconeDocumentStore if PINECONE_API_KEY not in environment variables - if not os.environ.get("PINECONE_API_KEY", False) and "pinecone" in keywords: - skip_pinecone = pytest.mark.skip(reason="PINECONE_API_KEY not in environment variables.") - item.add_marker(skip_pinecone) + # if not os.environ.get("PINECONE_API_KEY", False) and "pinecone" in keywords: + # skip_pinecone = pytest.mark.skip(reason="PINECONE_API_KEY not in environment variables.") + # item.add_marker(skip_pinecone) # @@ -742,8 +744,22 @@ def ensure_ids_are_correct_uuids(docs: list, document_store: object) -> None: d["id"] = str(uuid.uuid4()) +# FIXME Fix this in the docstore tests refactoring +from inspect import getmembers, isclass, isfunction + + +def mock_pinecone(monkeypatch): + for fname, function in getmembers(pinecone_mock, isfunction): + monkeypatch.setattr(f"pinecone.{fname}", function, raising=False) + for cname, class_ in getmembers(pinecone_mock, isclass): + monkeypatch.setattr(f"pinecone.{cname}", class_, raising=False) + + @pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus1", "milvus", "weaviate", "pinecone"]) -def document_store_with_docs(request, docs, tmp_path): +def document_store_with_docs(request, docs, tmp_path, monkeypatch): + if request.param == "pinecone": + mock_pinecone(monkeypatch) + embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768)) document_store = get_document_store( document_store_type=request.param, embedding_dim=embedding_dim.args[0], tmp_path=tmp_path @@ -754,7 +770,10 @@ def document_store_with_docs(request, docs, tmp_path): @pytest.fixture -def document_store(request, tmp_path): +def document_store(request, tmp_path, monkeypatch: pytest.MonkeyPatch): + if request.param == "pinecone": + mock_pinecone(monkeypatch) + embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768)) document_store = get_document_store( document_store_type=request.param, embedding_dim=embedding_dim.args[0], tmp_path=tmp_path @@ -764,7 +783,10 @@ def document_store(request, tmp_path): @pytest.fixture(params=["memory", "faiss", "milvus1", "milvus", "elasticsearch", "pinecone"]) -def document_store_dot_product(request, tmp_path): +def document_store_dot_product(request, tmp_path, monkeypatch): + if request.param == "pinecone": + mock_pinecone(monkeypatch) + embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768)) document_store = get_document_store( document_store_type=request.param, @@ -777,7 +799,10 @@ def document_store_dot_product(request, tmp_path): @pytest.fixture(params=["memory", "faiss", "milvus1", "milvus", "elasticsearch", "pinecone"]) -def document_store_dot_product_with_docs(request, docs, tmp_path): +def document_store_dot_product_with_docs(request, docs, tmp_path, monkeypatch): + if request.param == "pinecone": + mock_pinecone(monkeypatch) + embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768)) document_store = get_document_store( document_store_type=request.param, @@ -791,7 +816,10 @@ def document_store_dot_product_with_docs(request, docs, tmp_path): @pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus1", "pinecone"]) -def document_store_dot_product_small(request, tmp_path): +def document_store_dot_product_small(request, tmp_path, monkeypatch): + if request.param == "pinecone": + mock_pinecone(monkeypatch) + embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(3)) document_store = get_document_store( document_store_type=request.param, @@ -804,7 +832,10 @@ def document_store_dot_product_small(request, tmp_path): @pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus1", "milvus", "weaviate", "pinecone"]) -def document_store_small(request, tmp_path): +def document_store_small(request, tmp_path, monkeypatch): + if request.param == "pinecone": + mock_pinecone(monkeypatch) + embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(3)) document_store = get_document_store( document_store_type=request.param, embedding_dim=embedding_dim.args[0], similarity="cosine", tmp_path=tmp_path @@ -931,7 +962,7 @@ def get_document_store( elif document_store_type == "pinecone": document_store = PineconeDocumentStore( - api_key=os.environ["PINECONE_API_KEY"], + api_key=os.environ.get("PINECONE_API_KEY"), embedding_dim=embedding_dim, embedding_field=embedding_field, index=index, diff --git a/test/mocks/pinecone_mock.py b/test/mocks/pinecone.py similarity index 90% rename from test/mocks/pinecone_mock.py rename to test/mocks/pinecone.py index 859f68661c..4aef3ccd18 100644 --- a/test/mocks/pinecone_mock.py +++ b/test/mocks/pinecone.py @@ -1,5 +1,10 @@ from typing import Optional, List +import logging + +logger = logging.getLogger(__name__) + + # Mock Pinecone instance CONFIG: dict = {"api_key": None, "environment": None, "indexes": {}} @@ -87,7 +92,9 @@ def query( def fetch(self, ids: List[str], namespace: str = ""): response: dict = {"namespace": namespace, "vectors": {}} if namespace not in self.index_config.namespaces: - raise ValueError("Namespace not found") + # If we query an empty/non-existent namespace, Pinecone will just return an empty response + logger.warning(f"No namespace called '{namespace}'") + return response records = self.index_config.namespaces[namespace] for record in records: if record["id"] in ids.copy(): @@ -98,7 +105,16 @@ def fetch(self, ids: List[str], namespace: str = ""): } return response - def delete(self, ids: Optional[List[str]] = None, namespace: str = "", filters: Optional[dict] = None): + def delete( + self, + ids: Optional[List[str]] = None, + namespace: str = "", + filters: Optional[dict] = None, + delete_all: bool = False, + ): + if delete_all: + self.index_config.namespaces[namespace] = [] + if namespace not in self.index_config.namespaces: pass elif ids is not None: