-
Notifications
You must be signed in to change notification settings - Fork 74
/
Copy pathbatch_inference.py
69 lines (60 loc) · 2.36 KB
/
batch_inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import warnings
# disable some warnings
transformers.logging.set_verbosity_error()
transformers.logging.disable_progress_bar()
warnings.filterwarnings('ignore')
# set device
device = 'cuda' # or cpu
torch.set_default_device(device)
model_name = 'BAAI/Bunny-v1_1-Llama-3-8B-V' # or 'BAAI/Bunny-Llama-3-8B-V' or 'BAAI/Bunny-v1_1-4B' or 'BAAI/Bunny-v1_0-4B' or 'BAAI/Bunny-v1_0-3B' or 'BAAI/Bunny-v1_0-3B-zh' or 'BAAI/Bunny-v1_0-2B-zh'
# create model
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16, # float32 for cpu
device_map='auto',
trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True)
# for batch inference
tokenizer.padding_side = "left"
tokenizer.pad_token_id = model.generation_config.pad_token_id
padding_max_length = 128 # customize for your circumstance
tokenizer.add_tokens(['<image>'])
image_token_id = tokenizer.convert_tokens_to_ids('<image>')
# text prompts
prompts = [
'What is the astronaut holding in his hand?',
'Why is the image funny?',
'What is the occupation of the person in the picture?',
'What animal is in the picture?'
]
texts = [
f"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\n{prompt} ASSISTANT:"
for prompt in prompts]
input_ids = torch.tensor(
[tokenizer(text, padding='max_length', max_length=padding_max_length).input_ids for text in texts],
dtype=torch.long).to(device)
input_ids[input_ids == image_token_id] = -200
# images, sample images can be found in https://huggingface.co/BAAI/Bunny-v1_1-Llama-3-8B-V/tree/main/images
image_paths = [
'example_1.png',
'example_2.png',
'example_1.png',
'example_2.png'
]
images = [Image.open(image_path) for image_path in image_paths]
image_tensor = model.process_images(images, model.config).to(dtype=model.dtype, device=device)
# generate
output_ids = model.generate(
input_ids,
images=image_tensor,
max_new_tokens=100,
use_cache=True,
repetition_penalty=1.0 # increase this to avoid chattering
)
print([ans.strip() for ans in tokenizer.batch_decode(output_ids[:, input_ids.shape[1]:], skip_special_tokens=True)])