Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable Sentence Transformer Inference with Intel Gaudi2 GPU Supported ( 'hpu' ) - Follow up for #2557 #2608

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions sentence_transformers/SentenceTransformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import math
import queue
import tempfile
import copy

from . import __MODEL_HUB_ORGANIZATION__
from .evaluation import SentenceEvaluator
Expand Down Expand Up @@ -89,11 +90,13 @@ def __init__(
token: Optional[Union[bool, str]] = None,
use_auth_token: Optional[Union[bool, str]] = None,
truncate_dim: Optional[int] = None,
padding: Union[str, bool] = True,
):
# Note: self._load_sbert_model can also update `self.prompts` and `self.default_prompt_name`
self.prompts = prompts or {}
self.default_prompt_name = default_prompt_name
self.truncate_dim = truncate_dim
self.padding = padding
self._model_card_vars = {}
self._model_card_text = None
self._model_config = {}
Expand Down Expand Up @@ -315,6 +318,9 @@ def encode(
ht.hpu.wrap_in_hpu_graph(self, disable_tensor_cache=True)
self.is_hpu_graph_enabled = True

from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
adapt_transformers_to_gaudi()

self.eval()
if show_progress_bar is None:
show_progress_bar = (
Expand Down Expand Up @@ -378,7 +384,11 @@ def encode(
features.update(extra_features)

with torch.no_grad():
out_features = self.forward(features)
if self.device.type == "hpu":
hpu_graph_out = self.forward(features)
out_features = copy.deepcopy(hpu_graph_out)
else:
out_features = self.forward(features)
out_features["sentence_embedding"] = truncate_embeddings(
out_features["sentence_embedding"], self.truncate_dim
)
Expand Down Expand Up @@ -595,8 +605,7 @@ def tokenize(self, texts: Union[List[str], List[Dict], List[Tuple[str, str]]]):
"""
kwargs = {}
# HPU models reach optimal performance if the padding is not dynamic
if self.device.type == "hpu":
kwargs["padding"] = "max_length"
kwargs["padding"] = self.padding

try:
return self._first_module().tokenize(texts, **kwargs)
Expand Down
8 changes: 6 additions & 2 deletions sentence_transformers/models/CLIPModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import transformers
import torch
from PIL import Image

from sentence_transformers.util import get_device_name

class CLIPModel(nn.Module):
def __init__(self, model_name: str = "openai/clip-vit-base-patch32", processor_name=None):
Expand Down Expand Up @@ -72,7 +72,11 @@ def tokenize(self, texts, padding: Union[str, bool] = True):
encoding["pixel_values"] = image_features.pixel_values

encoding["image_text_info"] = image_text_info
return encoding
device = get_device_name()
if device == "hpu":
return dict(encoding)
else:
return encoding

def save(self, output_path: str):
self.model.save_pretrained(output_path)
Expand Down
10 changes: 2 additions & 8 deletions tests/test_compute_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import numpy as np

from sentence_transformers import SentenceTransformer
from sentence_transformers.util import get_device_name


def test_encode_token_embeddings(paraphrase_distilroberta_base_v1_model: SentenceTransformer) -> None:
Expand All @@ -24,13 +23,8 @@ def test_encode_token_embeddings(paraphrase_distilroberta_base_v1_model: Sentenc
emb = model.encode(sent, output_value="token_embeddings", batch_size=2)
assert len(emb) == len(sent)

device = get_device_name()
if device == "hpu":
for s, e in zip(sent, emb):
assert len(model.tokenize([s])["input_ids"][0]) == model.get_max_seq_length()
else:
for s, e in zip(sent, emb):
assert len(model.tokenize([s])["input_ids"][0]) == e.shape[0]
for s, e in zip(sent, emb):
assert len(model.tokenize([s])["input_ids"][0]) == e.shape[0]


def test_encode_single_sentences(paraphrase_distilroberta_base_v1_model: SentenceTransformer) -> None:
Expand Down
Loading