Merge pull request #26 from hawkeye217/jina-models

Jina models
hawkeye217 · Oct 9, 2024 · 98876a4 · 98876a4
2 parents 9d7208c + beae4b9
commit 98876a4
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 3 deletions.
diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py
@@ -91,7 +91,7 @@ def __init__(self, db: SqliteVecQueueDatabase) -> None:
             "jinaai/jina-clip-v1-text_model_fp16.onnx",
             "jinaai/jina-clip-v1-tokenizer",
             "jinaai/jina-clip-v1-vision_model_fp16.onnx",
-            "jinaai/jina-clip-v1-feature_extractor.json",
+            "jinaai/jina-clip-v1-feature_extractor",
         ]
 
         for model in models:
@@ -150,6 +150,14 @@ def _create_tables(self):
             );
         """)
 
+    def _drop_tables(self):
+        self.db.execute_sql("""
+            DROP TABLE vec_descriptions;
+        """)
+        self.db.execute_sql("""
+            DROP TABLE vec_thumbnails;
+        """)
+
     def upsert_thumbnail(self, event_id: str, thumbnail: bytes):
         # Convert thumbnail bytes to PIL Image
         image = Image.open(io.BytesIO(thumbnail)).convert("RGB")
@@ -281,6 +289,9 @@ def search_description(
     def reindex(self) -> None:
         logger.info("Indexing event embeddings...")
 
+        self._drop_tables()
+        self._create_tables()
+
         st = time.time()
         totals = {
             "thumb": 0,

diff --git a/frigate/embeddings/functions/onnx.py b/frigate/embeddings/functions/onnx.py
@@ -75,6 +75,7 @@ def _download_model(self, path: str):
                         logger.info(f"Downloading {self.model_name} tokenizer")
                     tokenizer = AutoTokenizer.from_pretrained(
                         self.model_name,
+                        trust_remote_code=True,
                         cache_dir=f"{MODEL_CACHE_DIR}/{self.model_name}/tokenizer",
                         clean_up_tokenization_spaces=True,
                     )
@@ -84,6 +85,7 @@ def _download_model(self, path: str):
                         logger.info(f"Downloading {self.model_name} feature extractor")
                     feature_extractor = AutoFeatureExtractor.from_pretrained(
                         self.model_name,
+                        trust_remote_code=True,
                         cache_dir=f"{MODEL_CACHE_DIR}/{self.model_name}/feature_extractor",
                     )
                     feature_extractor.save_pretrained(path)
@@ -119,14 +121,19 @@ def _load_model_and_tokenizer(self):
     def _load_tokenizer(self):
         tokenizer_path = os.path.join(f"{MODEL_CACHE_DIR}/{self.model_name}/tokenizer")
         return AutoTokenizer.from_pretrained(
-            tokenizer_path, clean_up_tokenization_spaces=True
+            self.model_name,
+            cache_dir=tokenizer_path,
+            trust_remote_code=True,
+            clean_up_tokenization_spaces=True,
         )
 
     def _load_feature_extractor(self):
         feature_extractor_path = os.path.join(
             f"{MODEL_CACHE_DIR}/{self.model_name}/feature_extractor"
         )
-        return AutoFeatureExtractor.from_pretrained(feature_extractor_path)
+        return AutoFeatureExtractor.from_pretrained(
+            self.model_name, trust_remote_code=True, cache_dir=feature_extractor_path
+        )
 
     def _load_model(self, path: str, providers: List[str]):
         if os.path.exists(path):