Skip to content

Commit

Permalink
refactor: Refactor Weaviate tests (#3541)
Browse files Browse the repository at this point in the history
* refactor tests

* fix job

* revert

* revert

* revert

* use latest weaviate

* fix abstract methods signatures

* pass class_name to all the CRUD methods

* finish moving all the tests

* bump weaviate version

* raise, don't pass
  • Loading branch information
masci authored Nov 14, 2022
1 parent da6b0dc commit 4dfddf0
Show file tree
Hide file tree
Showing 7 changed files with 282 additions and 251 deletions.
110 changes: 38 additions & 72 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,44 @@ jobs:
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'


integration-tests-weaviate:
name: Integration / Weaviate / ${{ matrix.os }}
needs:
- unit-tests
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
runs-on: ${{ matrix.os }}
services:
weaviate:
image: semitechnologies/weaviate:1.16.0
env:
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: "true"
PERSISTENCE_DATA_PATH: "/var/lib/weaviate"
ENABLE_EXPERIMENTAL_BM25: "true"
DISK_USE_READONLY_PERCENTAGE: 95
ports:
- 8080:8080
steps:
- uses: actions/checkout@v3

- name: Setup Python
uses: ./.github/actions/python_cache/

- name: Install Haystack
run: pip install -U .[docstores]

- name: Run tests
run: |
pytest --maxfail=5 -m "document_store and integration" test/document_stores/test_weaviate.py
- uses: act10ns/slack@v1
with:
status: ${{ job.status }}
channel: '#haystack'
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'

#
# TODO: the following steps need to be revisited
#
Expand Down Expand Up @@ -502,78 +540,6 @@ jobs:
# pytest ${{ env.PYTEST_PARAMS }} -m "milvus and not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=milvus


weaviate-tests-linux:
needs: [mypy, pylint, black]
runs-on: ubuntu-latest
if: contains(github.event.pull_request.labels.*.name, 'topic:weaviate') || !github.event.pull_request.draft

steps:
- uses: actions/checkout@v3

- name: Setup Python
uses: ./.github/actions/python_cache/

- name: Setup Weaviate
run: docker run -d -p 8080:8080 --name haystack_test_weaviate --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' --env ENABLE_EXPERIMENTAL_BM25='true' --env DISK_USE_READONLY_PERCENTAGE='95' semitechnologies/weaviate:1.14.1

# TODO Let's try to remove this one from the unit tests
- name: Install pdftotext
run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin

- name: Install Haystack
run: pip install .[weaviate]

- name: Run tests
env:
TOKENIZERS_PARALLELISM: 'false'
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "weaviate and not integration" test/document_stores/ --document_store_type=weaviate
- name: Dump docker logs on failure
if: failure()
uses: jwalton/gh-docker-logs@v1

- uses: act10ns/slack@v1
with:
status: ${{ job.status }}
channel: '#haystack'
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'

# FIXME: seems like we can't run containers on Windows
# weaviate-tests-windows:
# needs:
# - mypy
# - pylint
# runs-on: windows-latest
# if: contains(github.event.pull_request.labels.*.name, 'topic:weaviate') && contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft

# steps:
# - uses: actions/checkout@v3

# - name: Setup Python
# uses: ./.github/actions/python_cache/
# with:
# prefix: windows

# - name: Setup Weaviate
# run: docker run -d -p 8080:8080 --name haystack_test_weaviate --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' --env ENABLE_EXPERIMENTAL_BM25='true' --env DISK_USE_READONLY_PERCENTAGE='95' semitechnologies/weaviate:1.14.1

# - name: Install pdftotext
# run: |
# choco install xpdf-utils
# choco install openjdk11
# refreshenv

# - name: Install Haystack
# run: pip install .[weaviate]

# - name: Run tests
# env:
# TOKENIZERS_PARALLELISM: 'false'
# run: |
# pytest ${{ env.PYTEST_PARAMS }} -m "weaviate and not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=weaviate


pinecone-tests-linux:
needs: [mypy, pylint, black]
runs-on: ubuntu-latest
Expand Down
36 changes: 26 additions & 10 deletions haystack/document_stores/weaviate.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

_optional_component_not_installed(__name__, "weaviate", ie)

from haystack.schema import Document
from haystack.schema import Document, Label
from haystack.document_stores import BaseDocumentStore
from haystack.document_stores.base import get_batches_from_generator
from haystack.document_stores.filter_utils import LogicalFilterClause
Expand Down Expand Up @@ -312,7 +312,7 @@ def get_document_by_id(
id = self._sanitize_id(id=id, index=index)
result = None
try:
result = self.weaviate_client.data_object.get_by_id(id, with_vector=True)
result = self.weaviate_client.data_object.get_by_id(id, class_name=index, with_vector=True)
except weaviate.exceptions.UnexpectedStatusCodeException as usce:
logging.debug("Weaviate could not get the document requested: %s", usce)
if result:
Expand All @@ -339,7 +339,7 @@ def get_documents_by_id(
id = self._sanitize_id(id=id, index=index)
result = None
try:
result = self.weaviate_client.data_object.get_by_id(id, with_vector=True)
result = self.weaviate_client.data_object.get_by_id(id, class_name=index, with_vector=True)
except weaviate.exceptions.UnexpectedStatusCodeException as usce:
logging.debug("Weaviate could not get the document requested: %s", usce)
if result:
Expand Down Expand Up @@ -1352,15 +1352,15 @@ def delete_documents(

if ids and not filters:
for id in ids:
self.weaviate_client.data_object.delete(id)
self.weaviate_client.data_object.delete(id, class_name=index)

else:
# Use filters to restrict list of retrieved documents, before checking these against provided ids
docs_to_delete = self.get_all_documents(index, filters=filters)
if ids:
docs_to_delete = [doc for doc in docs_to_delete if doc.id in ids]
for doc in docs_to_delete:
self.weaviate_client.data_object.delete(doc.id)
self.weaviate_client.data_object.delete(doc.id, class_name=index)

def delete_index(self, index: str):
"""
Expand All @@ -1382,34 +1382,50 @@ def _delete_index(self, index: str):
self.weaviate_client.schema.delete_class(index)
logger.info("Index '%s' deleted.", index)

def delete_labels(self):
def delete_labels(
self,
index: Optional[str] = None,
ids: Optional[List[str]] = None,
filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None,
headers: Optional[Dict[str, str]] = None,
):
"""
Implemented to respect BaseDocumentStore's contract.
Weaviate does not support labels (yet).
"""
raise NotImplementedError("Weaviate does not support labels (yet).")

def get_all_labels(self):
def get_all_labels(
self,
index: Optional[str] = None,
filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None,
headers: Optional[Dict[str, str]] = None,
) -> List[Label]:
"""
Implemented to respect BaseDocumentStore's contract.
Weaviate does not support labels (yet).
"""
raise NotImplementedError("Weaviate does not support labels (yet).")

def get_label_count(self):
def get_label_count(self, index: Optional[str] = None, headers: Optional[Dict[str, str]] = None) -> int:
"""
Implemented to respect BaseDocumentStore's contract.
Weaviate does not support labels (yet).
"""
raise NotImplementedError("Weaviate does not support labels (yet).")

def write_labels(self):
def write_labels(
self,
labels: Union[List[Label], List[dict]],
index: Optional[str] = None,
headers: Optional[Dict[str, str]] = None,
):
"""
Implemented to respect BaseDocumentStore's contract.
Weaviate does not support labels (yet).
"""
pass
raise NotImplementedError("Weaviate does not support labels (yet).")
2 changes: 1 addition & 1 deletion haystack/utils/doc_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def launch_weaviate(sleep=15):
logger.debug("Starting Weaviate ...")
status = subprocess.run(
[
f"docker start {WEAVIATE_CONTAINER_NAME} > /dev/null 2>&1 || docker run -d -p 8080:8080 --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' --name {WEAVIATE_CONTAINER_NAME} semitechnologies/weaviate:1.14.0"
f"docker start {WEAVIATE_CONTAINER_NAME} > /dev/null 2>&1 || docker run -d -p 8080:8080 --env AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' --env PERSISTENCE_DATA_PATH='/var/lib/weaviate' --name {WEAVIATE_CONTAINER_NAME} semitechnologies/weaviate:latest"
],
shell=True,
)
Expand Down
7 changes: 2 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ milvus = [
"farm-haystack[sql,only-milvus]",
]
weaviate = [
"weaviate-client==3.6.0",
"weaviate-client==3.9.0",
]
only-pinecone = [
"pinecone-client>=2.0.11,<3",
Expand Down Expand Up @@ -314,9 +314,6 @@ disable = [
"simplifiable-if-expression",
"use-list-literal",




# To review later
"cyclic-import",
"import-outside-toplevel",
Expand All @@ -334,7 +331,7 @@ addopts = "--strict-markers"
markers = [
"unit: unit tests",
"integration: integration tests",

"generator: generator tests",
"summarizer: summarizer tests",
"embedding_dim: uses a document store with non-default embedding dimension (e.g @pytest.mark.embedding_dim(128))",
Expand Down
2 changes: 1 addition & 1 deletion test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ def weaviate_fixture():
print("Starting Weaviate servers ...")
status = subprocess.run(["docker rm haystack_test_weaviate"], shell=True)
status = subprocess.run(
["docker run -d --name haystack_test_weaviate -p 8080:8080 semitechnologies/weaviate:1.14.1"], shell=True
["docker run -d --name haystack_test_weaviate -p 8080:8080 semitechnologies/weaviate:latest"], shell=True
)
if status.returncode:
raise Exception("Failed to launch Weaviate. Please check docker container logs.")
Expand Down
8 changes: 4 additions & 4 deletions test/document_stores/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,13 +177,13 @@ def test_nin_filters(self, ds, documents):
def test_comparison_filters(self, ds, documents):
ds.write_documents(documents)

result = ds.get_all_documents(filters={"numbers": {"$gt": 0}})
result = ds.get_all_documents(filters={"numbers": {"$gt": 0.0}})
assert len(result) == 3

result = ds.get_all_documents(filters={"numbers": {"$gte": -2}})
result = ds.get_all_documents(filters={"numbers": {"$gte": -2.0}})
assert len(result) == 6

result = ds.get_all_documents(filters={"numbers": {"$lt": 0}})
result = ds.get_all_documents(filters={"numbers": {"$lt": 0.0}})
assert len(result) == 3

result = ds.get_all_documents(filters={"numbers": {"$lte": 2.0}})
Expand Down Expand Up @@ -297,7 +297,7 @@ def test_get_documents_by_id(self, ds, documents):
@pytest.mark.integration
def test_get_document_count(self, ds, documents):
ds.write_documents(documents)
assert ds.get_document_count() == 9
assert ds.get_document_count() == len(documents)
assert ds.get_document_count(filters={"year": ["2020"]}) == 3
assert ds.get_document_count(filters={"month": ["02"]}) == 3

Expand Down
Loading

0 comments on commit 4dfddf0

Please sign in to comment.