Merge pull request #106 from weaviate/trengrj/model2vec

model2vec support
weaviate · Feb 1, 2025 · e6cd4d8 · e6cd4d8
2 parents a22f5d7 + fc8db4b
commit e6cd4d8
Show file tree

Hide file tree

Showing 5 changed files with 42 additions and 14 deletions.
diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
@@ -113,6 +113,18 @@ jobs:
           - model_name: Snowflake/snowflake-arctic-embed-s
             model_tag_name: snowflake-snowflake-arctic-embed-s
             onnx_runtime: false
+          - model_name: minishlab/potion-base-32M
+            model_tag_name: minishlab/potion-base-32M
+            use_sentence_transformers_vectorizer: true
+            onnx_runtime: false
+          - model_name: minishlab/potion-base-8M
+            model_tag_name: minishlab/potion-base-8M
+            use_sentence_transformers_vectorizer: true
+            onnx_runtime: false
+          - model_name: minishlab/potion-base-4M
+            model_tag_name: minishlab/potion-base-4M
+            use_sentence_transformers_vectorizer: true
+            onnx_runtime: false
           - model_name: Snowflake/snowflake-arctic-embed-s
             model_tag_name: snowflake-snowflake-arctic-embed-s
             onnx_runtime: true

diff --git a/app.py b/app.py
@@ -142,6 +142,11 @@ def log_info_about_onnx(onnx_runtime: bool):
         use_sentence_transformers_vectorizer,
         trust_remote_code,
     )
+
+    if cuda_support is False and meta_config.get_model_type() == "model2vec":
+        # in case of CPU we need to run this model explicitly on CPU device, not MPS device
+        cuda_core = "cpu"
+
     vec = Vectorizer(
         model_dir,
         cuda_support,

diff --git a/download.py b/download.py
@@ -114,17 +114,28 @@ def save_model_config(model_config):
     print(
         f"Downloading model {model_name} from huggingface model hub ({trust_remote_code=})"
     )
-    config = AutoConfig.from_pretrained(model_name, trust_remote_code=trust_remote_code)
-    model_type = config.to_dict()["model_type"]
+    try:
+        config = AutoConfig.from_pretrained(
+            model_name, trust_remote_code=trust_remote_code
+        )
+        config_dict = config.to_dict()
+        model_type = config.to_dict()["model_type"]
+    except ValueError as e:
+        # Check if this is the specific model2vec error
+        if "model type `model2vec`" in str(e):
+            config_dict = {"model_type": "model2vec"}
+            model_type = "model2vec"
+        else:
+            raise e
 
     if (
-        model_type is not None and model_type == "t5"
+        model_type is not None and (model_type == "t5" or model_type == "model2vec")
     ) or use_sentence_transformers_vectorizer.lower() == "true":
         SentenceTransformer(
             model_name, cache_folder=model_dir, trust_remote_code=trust_remote_code
         )
         save_model_name(model_name)
-        save_model_config(config.to_dict())
+        save_model_config(config_dict)
     else:
         if config.architectures and not force_automodel:
             print(f"Using class {config.architectures[0]} to load model weights")

diff --git a/requirements-test.txt b/requirements-test.txt
@@ -1,11 +1,11 @@
-transformers==4.47.1
-fastapi==0.115.6
+transformers==4.48.2
+fastapi==0.115.8
 uvicorn==0.34.0
 nltk==3.9.1
-torch==2.5.1
+torch==2.6.0
 sentencepiece==0.2.0
-sentence-transformers==3.1.1
-optimum==1.23.3
+sentence-transformers==3.4.1
+optimum==1.24.0
 onnxruntime==1.20.1
 onnx==1.17.0
 numpy==1.26.4

diff --git a/requirements.txt b/requirements.txt
@@ -1,11 +1,11 @@
-transformers==4.47.1
-fastapi==0.115.6
+transformers==4.48.2
+fastapi==0.115.8
 uvicorn==0.34.0
 nltk==3.9.1
-torch==2.5.1
+torch==2.6.0
 sentencepiece==0.2.0
-sentence-transformers==3.1.1
-optimum==1.23.3
+sentence-transformers==3.4.1
+optimum==1.24.0
 onnxruntime==1.20.1
 onnx==1.17.0
 numpy==1.26.4