Skip to content

Commit

Permalink
Mocking Pinecone tests (#2778)
Browse files Browse the repository at this point in the history
* Integrating the mock into conftest.py

* re-enable workflow

* delete_all

* Update Documentation & Code Style

* remove ValueError

* Add empty response

* wrong condition

* return response

* revert removal of delete_all

* change mock

* Update Documentation & Code Style

* test for rest api, to revert

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
ZanSara and github-actions[bot] authored Jul 14, 2022
1 parent e6d8bcd commit 6b39fbd
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 66 deletions.
104 changes: 50 additions & 54 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -428,68 +428,64 @@ jobs:
# pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=weaviate


# FIXME: This block should be uncommented as soon as Pinecone tests are fixed
# as part of the fixes discussed in #2644.
# Check locally for these tests to pass before uncommenting.
#
# pinecone-tests-linux:
# needs:
# - mypy
# - pylint
# runs-on: ubuntu-latest
# if: contains(github.event.pull_request.labels.*.name, 'topic:pinecone') || !github.event.pull_request.draft
pinecone-tests-linux:
needs:
- mypy
- pylint
runs-on: ubuntu-latest
if: contains(github.event.pull_request.labels.*.name, 'topic:pinecone') || !github.event.pull_request.draft

# steps:
# - uses: actions/checkout@v2
steps:
- uses: actions/checkout@v2

# - name: Setup Python
# uses: ./.github/actions/python_cache/
- name: Setup Python
uses: ./.github/actions/python_cache/

# # TODO Let's try to remove this one from the unit tests
# - name: Install pdftotext
# run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
# TODO Let's try to remove this one from the unit tests
- name: Install pdftotext
run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin

# - name: Install Haystack
# run: pip install .[pinecone]
- name: Install Haystack
run: pip install .[pinecone]

# - name: Run tests
# env:
# PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
# TOKENIZERS_PARALLELISM: 'false'
# run: |
# pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=pinecone
- name: Run tests
env:
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
TOKENIZERS_PARALLELISM: 'false'
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=pinecone
# FIXME very slow and very little platform dependency, so to evaluate
# pinecone-tests-windows:
# needs:
# - mypy
# - pylint
# runs-on: windows-latest
# if: contains(github.event.pull_request.labels.*.name, 'topic:pinecone') && contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft
# steps:
# - uses: actions/checkout@v2
pinecone-tests-windows:
needs:
- mypy
- pylint
runs-on: windows-latest
if: contains(github.event.pull_request.labels.*.name, 'topic:pinecone') && contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft

# - name: Setup Python
# uses: ./.github/actions/python_cache/
# with:
# prefix: windows
steps:
- uses: actions/checkout@v2

# - name: Install pdftotext
# run: |
# choco install xpdf-utils
# choco install openjdk11
# refreshenv
- name: Setup Python
uses: ./.github/actions/python_cache/
with:
prefix: windows

# - name: Install Haystack
# run: pip install .[pinecone]
- name: Install pdftotext
run: |
choco install xpdf-utils
choco install openjdk11
refreshenv
# - name: Run tests
# env:
# TOKENIZERS_PARALLELISM: 'false'
# PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
# run: |
# pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=pinecone
- name: Install Haystack
run: pip install .[pinecone]

- name: Run tests
env:
TOKENIZERS_PARALLELISM: 'false'
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=pinecone
rest-and-ui:
needs:
Expand All @@ -510,9 +506,9 @@ jobs:

- name: Install REST API and UI
run: |
pip install rest_api/
pip install ui/
pip install -U rest_api/
pip install -U ui/
- name: Run tests
run: |
pytest ${{ env.PYTEST_PARAMS }} rest_api/ ui/
Expand Down
51 changes: 41 additions & 10 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@

from haystack.schema import Document

from .mocks import pinecone as pinecone_mock


# To manually run the tests with default PostgreSQL instead of SQLite, switch the lines below
SQL_TYPE = "sqlite"
Expand Down Expand Up @@ -159,9 +161,9 @@ def pytest_collection_modifyitems(config, items):
item.add_marker(skip_milvus)

# Skip PineconeDocumentStore if PINECONE_API_KEY not in environment variables
if not os.environ.get("PINECONE_API_KEY", False) and "pinecone" in keywords:
skip_pinecone = pytest.mark.skip(reason="PINECONE_API_KEY not in environment variables.")
item.add_marker(skip_pinecone)
# if not os.environ.get("PINECONE_API_KEY", False) and "pinecone" in keywords:
# skip_pinecone = pytest.mark.skip(reason="PINECONE_API_KEY not in environment variables.")
# item.add_marker(skip_pinecone)


#
Expand Down Expand Up @@ -742,8 +744,22 @@ def ensure_ids_are_correct_uuids(docs: list, document_store: object) -> None:
d["id"] = str(uuid.uuid4())


# FIXME Fix this in the docstore tests refactoring
from inspect import getmembers, isclass, isfunction


def mock_pinecone(monkeypatch):
for fname, function in getmembers(pinecone_mock, isfunction):
monkeypatch.setattr(f"pinecone.{fname}", function, raising=False)
for cname, class_ in getmembers(pinecone_mock, isclass):
monkeypatch.setattr(f"pinecone.{cname}", class_, raising=False)


@pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus1", "milvus", "weaviate", "pinecone"])
def document_store_with_docs(request, docs, tmp_path):
def document_store_with_docs(request, docs, tmp_path, monkeypatch):
if request.param == "pinecone":
mock_pinecone(monkeypatch)

embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768))
document_store = get_document_store(
document_store_type=request.param, embedding_dim=embedding_dim.args[0], tmp_path=tmp_path
Expand All @@ -754,7 +770,10 @@ def document_store_with_docs(request, docs, tmp_path):


@pytest.fixture
def document_store(request, tmp_path):
def document_store(request, tmp_path, monkeypatch: pytest.MonkeyPatch):
if request.param == "pinecone":
mock_pinecone(monkeypatch)

embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768))
document_store = get_document_store(
document_store_type=request.param, embedding_dim=embedding_dim.args[0], tmp_path=tmp_path
Expand All @@ -764,7 +783,10 @@ def document_store(request, tmp_path):


@pytest.fixture(params=["memory", "faiss", "milvus1", "milvus", "elasticsearch", "pinecone"])
def document_store_dot_product(request, tmp_path):
def document_store_dot_product(request, tmp_path, monkeypatch):
if request.param == "pinecone":
mock_pinecone(monkeypatch)

embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768))
document_store = get_document_store(
document_store_type=request.param,
Expand All @@ -777,7 +799,10 @@ def document_store_dot_product(request, tmp_path):


@pytest.fixture(params=["memory", "faiss", "milvus1", "milvus", "elasticsearch", "pinecone"])
def document_store_dot_product_with_docs(request, docs, tmp_path):
def document_store_dot_product_with_docs(request, docs, tmp_path, monkeypatch):
if request.param == "pinecone":
mock_pinecone(monkeypatch)

embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768))
document_store = get_document_store(
document_store_type=request.param,
Expand All @@ -791,7 +816,10 @@ def document_store_dot_product_with_docs(request, docs, tmp_path):


@pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus1", "pinecone"])
def document_store_dot_product_small(request, tmp_path):
def document_store_dot_product_small(request, tmp_path, monkeypatch):
if request.param == "pinecone":
mock_pinecone(monkeypatch)

embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(3))
document_store = get_document_store(
document_store_type=request.param,
Expand All @@ -804,7 +832,10 @@ def document_store_dot_product_small(request, tmp_path):


@pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus1", "milvus", "weaviate", "pinecone"])
def document_store_small(request, tmp_path):
def document_store_small(request, tmp_path, monkeypatch):
if request.param == "pinecone":
mock_pinecone(monkeypatch)

embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(3))
document_store = get_document_store(
document_store_type=request.param, embedding_dim=embedding_dim.args[0], similarity="cosine", tmp_path=tmp_path
Expand Down Expand Up @@ -931,7 +962,7 @@ def get_document_store(

elif document_store_type == "pinecone":
document_store = PineconeDocumentStore(
api_key=os.environ["PINECONE_API_KEY"],
api_key=os.environ.get("PINECONE_API_KEY"),
embedding_dim=embedding_dim,
embedding_field=embedding_field,
index=index,
Expand Down
20 changes: 18 additions & 2 deletions test/mocks/pinecone_mock.py → test/mocks/pinecone.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from typing import Optional, List

import logging

logger = logging.getLogger(__name__)


# Mock Pinecone instance
CONFIG: dict = {"api_key": None, "environment": None, "indexes": {}}

Expand Down Expand Up @@ -87,7 +92,9 @@ def query(
def fetch(self, ids: List[str], namespace: str = ""):
response: dict = {"namespace": namespace, "vectors": {}}
if namespace not in self.index_config.namespaces:
raise ValueError("Namespace not found")
# If we query an empty/non-existent namespace, Pinecone will just return an empty response
logger.warning(f"No namespace called '{namespace}'")
return response
records = self.index_config.namespaces[namespace]
for record in records:
if record["id"] in ids.copy():
Expand All @@ -98,7 +105,16 @@ def fetch(self, ids: List[str], namespace: str = ""):
}
return response

def delete(self, ids: Optional[List[str]] = None, namespace: str = "", filters: Optional[dict] = None):
def delete(
self,
ids: Optional[List[str]] = None,
namespace: str = "",
filters: Optional[dict] = None,
delete_all: bool = False,
):
if delete_all:
self.index_config.namespaces[namespace] = []

if namespace not in self.index_config.namespaces:
pass
elif ids is not None:
Expand Down

0 comments on commit 6b39fbd

Please sign in to comment.