Skip to content

Commit

Permalink
Apply suggestions from code review
Browse files Browse the repository at this point in the history
Co-authored-by: Nikita Savelyev <nikita.savelyev@intel.com>
  • Loading branch information
eaidova and nikita-savelyevv authored Dec 6, 2024
1 parent 7af7cdc commit 163dadd
Showing 1 changed file with 1 addition and 3 deletions.
4 changes: 1 addition & 3 deletions optimum/intel/openvino/modeling_visual_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,7 @@ def prepare_inputs(
position_ids = position_ids[:, -inputs_embeds.shape[1] :]

if self.config.model_type == "qwen2_vl" and position_ids.ndim != 3:
position_ids = np.expand_dims(position_ids, 0)
position_ids = np.concatenate([position_ids, position_ids, position_ids], axis=0)
position_ids = np.repeat(np.expand_dims(position_ids, 0), 3, axis=0)

inputs["position_ids"] = position_ids

Expand Down Expand Up @@ -2132,7 +2131,6 @@ def get_rope_index(
for i, input_ids in enumerate(total_input_ids):
if attention_mask is not None:
input_ids = input_ids[attention_mask[i] == 1]
image_nums, video_nums = 0, 0
vision_start_indices = torch.argwhere(input_ids == vision_start_token_id).squeeze(1)
vision_tokens = input_ids[vision_start_indices + 1]
image_nums = (vision_tokens == image_token_id).sum()
Expand Down

0 comments on commit 163dadd

Please sign in to comment.