diff --git a/app.py b/app.py index c5c522d..d367841 100644 --- a/app.py +++ b/app.py @@ -118,7 +118,11 @@ def log_info_about_onnx(onnx_runtime: bool): cuda_support = True cuda_core = os.getenv("CUDA_CORE") if cuda_core is None or cuda_core == "": - if use_sentence_transformers_vectorizer and use_sentence_transformers_multi_process and torch.cuda.is_available(): + if ( + use_sentence_transformers_vectorizer + and use_sentence_transformers_multi_process + and torch.cuda.is_available() + ): available_workers = torch.cuda.device_count() cuda_core = ",".join([f"cuda:{i}" for i in range(available_workers)]) else: @@ -127,8 +131,6 @@ def log_info_about_onnx(onnx_runtime: bool): else: logger.info("Running on CPU") - - # Batch text tokenization enabled by default direct_tokenize = get_t2v_transformers_direct_tokenize() diff --git a/cicd/test.sh b/cicd/test.sh index adfc8e6..5f4597b 100755 --- a/cicd/test.sh +++ b/cicd/test.sh @@ -14,6 +14,14 @@ python3 smoke_auth_test.py docker stop $container_id +echo "Running tests with enabled cache" + +container_id=$(docker run -d -it -e ENABLE_CACHE='1' -p "8000:8080" "$local_repo") + +python3 smoke_validate_cache_test.py + +docker stop $container_id + echo "Running tests without authorization" container_id=$(docker run -d -it -p "8000:8080" "$local_repo") diff --git a/smoke_validate_cache_test.py b/smoke_validate_cache_test.py index ee3c513..206ecb4 100755 --- a/smoke_validate_cache_test.py +++ b/smoke_validate_cache_test.py @@ -48,6 +48,17 @@ def _test_vectorizing_sentences(self): self._try_to_vectorize(self.url + "/vectors/", sentence) self._try_to_vectorize(self.url + "/vectors", sentence) + def test_vectorize_payload_with_config(self): + weaviate_facts = [ + "Vector database for semantic search.", + "Supports similarity-based queries.", + "Integrates with ML for classification.", + ] + for _ in range(10): + for fact in weaviate_facts: + self._try_to_vectorize(self.url + "/vectors/", fact, "query") + self._try_to_vectorize(self.url + "/vectors", fact, "passage") + def test_vectorizing_cached_results(self): start = time.time() before = {} diff --git a/vectorizer.py b/vectorizer.py index e985d9c..4ae7466 100644 --- a/vectorizer.py +++ b/vectorizer.py @@ -34,11 +34,30 @@ class VectorInputConfig(BaseModel): pooling_strategy: Optional[str] = None task_type: Optional[str] = None + def __hash__(self): + return hash((self.pooling_strategy, self.task_type)) + + def __eq__(self, other): + if isinstance(other, VectorInputConfig): + return ( + self.pooling_strategy == other.pooling_strategy + and self.task_type == other.task_type + ) + return False + class VectorInput(BaseModel): text: str config: Optional[VectorInputConfig] = None + def __hash__(self): + return hash((self.text, self.config)) + + def __eq__(self, other): + if isinstance(other, VectorInput): + return self.text == other.text and self.config == other.config + return False + class Vectorizer: executor: ThreadPoolExecutor