-
Notifications
You must be signed in to change notification settings - Fork 174
/
Copy pathbatch_infer.py
74 lines (64 loc) · 2.16 KB
/
batch_infer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import paddle
from paddlemix.models.qwen2_vl import MIXQwen2Tokenizer
from paddlemix.models.qwen2_vl.modeling_qwen2_vl import Qwen2VLForConditionalGeneration
from paddlemix.processors.qwen2_vl_processing import (
Qwen2VLImageProcessor,
Qwen2VLProcessor,
process_vision_info,
)
min_pixels = 256 * 28 * 28
max_pixels = 1280 * 28 * 28
model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", dtype="bfloat16")
image_processor = Qwen2VLImageProcessor()
tokenizer = MIXQwen2Tokenizer.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
processor = Qwen2VLProcessor(image_processor, tokenizer,min_pixels=min_pixels, max_pixels=max_pixels)
# Sample messages for batch inference
messages1 = [
{
"role": "user",
"content": [
{"type": "image", "image": "paddlemix/demo_images/examples_image1.jpg"},
{"type": "image", "image": "paddlemix/demo_images/examples_image2.jpg"},
{"type": "text", "text": "What are the common elements in these pictures?"},
],
}
]
messages2 = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Who are you?"},
]
messages3 = [
{
"role": "user",
"content": [
{
"type": "image",
"image": "paddlemix/demo_images/twitter3.jpeg",
},
{"type": "text", "text": "Describe this image."},
],
}
]
# Combine messages for batch processing
messages = [messages1, messages3]
# Preparation for batch inference
texts = [
processor.tokenizer.apply_chat_template(msg, tokenize=False, add_generation_prompt=True)
for msg in messages
]
image_inputs, video_inputs = process_vision_info(messages)
inputs = processor(
text=texts,
images=image_inputs,
videos=video_inputs,
padding=True,
return_tensors="pd",
)
import pdb; pdb.set_trace()
# Batch Inference
generated_ids = model.generate(**inputs, max_new_tokens=128)
output_text = processor.batch_decode(
generated_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=False
)
for i in range(len(output_text)):
print(f"Generated output for message {i}: {output_text[i]}")