From 12c3d329d622477f1e6869734706d7151eab50cb Mon Sep 17 00:00:00 2001 From: Jacob Marks Date: Wed, 3 Jan 2024 18:37:19 -0500 Subject: [PATCH 1/2] adding embeddings to open_clip --- fiftyone/utils/open_clip.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/fiftyone/utils/open_clip.py b/fiftyone/utils/open_clip.py index 15d4cab964..6361119cff 100644 --- a/fiftyone/utils/open_clip.py +++ b/fiftyone/utils/open_clip.py @@ -58,6 +58,32 @@ def __init__(self, config): super().__init__(config) self._text_features = None + @property + def can_embed_prompts(self): + return True + + def embed_prompt(self, prompt): + """Generates an embedding for the given text prompt. + + Args: + prompt: a text string + + Returns: + a numpy vector + """ + return self.embed_prompts([prompt])[0] + + def embed_prompts(self, prompts): + """Generates an embedding for the given text prompts. + + Args: + prompts: an iterable of text strings + + Returns: + a ``num_prompts x num_dims`` array of prompt embeddings + """ + return self._embed_prompts(prompts).detach().cpu().numpy() + def _load_model(self, config): ( self._model, @@ -80,6 +106,14 @@ def _get_text_features(self): return self._text_features + def _embed_prompts(self, prompts): + formatted_prompts = [ + "%s %s" % (self.config.text_prompt, p) for p in prompts + ] + # Tokenize text + text = self._tokenizer(formatted_prompts) + return self._model.encode_text(text) + def _get_class_logits(self, text_features, image_features): # source: https://github.com/openai/CLIP/blob/main/README.md image_features = image_features / image_features.norm( From 78a4463a7542452d75dd412b8ca52defc32a972f Mon Sep 17 00:00:00 2001 From: brimoor Date: Wed, 3 Jan 2024 20:55:18 -0500 Subject: [PATCH 2/2] remove unnecessary manager --- fiftyone/zoo/models/manifest-torch.json | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fiftyone/zoo/models/manifest-torch.json b/fiftyone/zoo/models/manifest-torch.json index 0bb1459f73..cf2251e9b2 100644 --- a/fiftyone/zoo/models/manifest-torch.json +++ b/fiftyone/zoo/models/manifest-torch.json @@ -1869,10 +1869,6 @@ "description": "OPEN CLIP text/image encoder from `Learning Transferable Visual Models From Natural Language Supervision `_ trained on 400M text-image pairs", "source": "https://github.com/mlfoundations/open_clip", "size_bytes": 353976522, - "manager": { - "type": "fiftyone.core.models.ModelManager", - "config": {} - }, "default_deployment_config_dict": { "type": "fiftyone.utils.open_clip.TorchOpenClipModel", "config": {