Skip to content

Commit

Permalink
Merge pull request #26 from hawkeye217/jina-models
Browse files Browse the repository at this point in the history
Jina models
  • Loading branch information
hawkeye217 authored Oct 9, 2024
2 parents 9d7208c + beae4b9 commit 98876a4
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 3 deletions.
13 changes: 12 additions & 1 deletion frigate/embeddings/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def __init__(self, db: SqliteVecQueueDatabase) -> None:
"jinaai/jina-clip-v1-text_model_fp16.onnx",
"jinaai/jina-clip-v1-tokenizer",
"jinaai/jina-clip-v1-vision_model_fp16.onnx",
"jinaai/jina-clip-v1-feature_extractor.json",
"jinaai/jina-clip-v1-feature_extractor",
]

for model in models:
Expand Down Expand Up @@ -150,6 +150,14 @@ def _create_tables(self):
);
""")

def _drop_tables(self):
self.db.execute_sql("""
DROP TABLE vec_descriptions;
""")
self.db.execute_sql("""
DROP TABLE vec_thumbnails;
""")

def upsert_thumbnail(self, event_id: str, thumbnail: bytes):
# Convert thumbnail bytes to PIL Image
image = Image.open(io.BytesIO(thumbnail)).convert("RGB")
Expand Down Expand Up @@ -281,6 +289,9 @@ def search_description(
def reindex(self) -> None:
logger.info("Indexing event embeddings...")

self._drop_tables()
self._create_tables()

st = time.time()
totals = {
"thumb": 0,
Expand Down
11 changes: 9 additions & 2 deletions frigate/embeddings/functions/onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def _download_model(self, path: str):
logger.info(f"Downloading {self.model_name} tokenizer")
tokenizer = AutoTokenizer.from_pretrained(
self.model_name,
trust_remote_code=True,
cache_dir=f"{MODEL_CACHE_DIR}/{self.model_name}/tokenizer",
clean_up_tokenization_spaces=True,
)
Expand All @@ -84,6 +85,7 @@ def _download_model(self, path: str):
logger.info(f"Downloading {self.model_name} feature extractor")
feature_extractor = AutoFeatureExtractor.from_pretrained(
self.model_name,
trust_remote_code=True,
cache_dir=f"{MODEL_CACHE_DIR}/{self.model_name}/feature_extractor",
)
feature_extractor.save_pretrained(path)
Expand Down Expand Up @@ -119,14 +121,19 @@ def _load_model_and_tokenizer(self):
def _load_tokenizer(self):
tokenizer_path = os.path.join(f"{MODEL_CACHE_DIR}/{self.model_name}/tokenizer")
return AutoTokenizer.from_pretrained(
tokenizer_path, clean_up_tokenization_spaces=True
self.model_name,
cache_dir=tokenizer_path,
trust_remote_code=True,
clean_up_tokenization_spaces=True,
)

def _load_feature_extractor(self):
feature_extractor_path = os.path.join(
f"{MODEL_CACHE_DIR}/{self.model_name}/feature_extractor"
)
return AutoFeatureExtractor.from_pretrained(feature_extractor_path)
return AutoFeatureExtractor.from_pretrained(
self.model_name, trust_remote_code=True, cache_dir=feature_extractor_path
)

def _load_model(self, path: str, providers: List[str]):
if os.path.exists(path):
Expand Down

0 comments on commit 98876a4

Please sign in to comment.