-
Notifications
You must be signed in to change notification settings - Fork 1
/
QA_Pairs_Generation.py
171 lines (143 loc) · 9.97 KB
/
QA_Pairs_Generation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import os
import json
import base64
from openai import OpenAI
import openai
from tqdm import tqdm
from argparse import ArgumentParser
client= OpenAI()
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def read_files_from_subfolder(subfolder_path, language, save_path):
dl = []
invalid_image_count = 0
# image_extensions = [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tif", ".tiff", ".webp", ".heif", ".heic", ".raw", ".cr2", ".nef", ".svg", ".eps"]
for root, dirs, files in os.walk(subfolder_path):
for file in files:
file_path = os.path.join(root, file)
if file_path.endswith("json"):
new_file_path = file_path.replace("Caption", "caption")
with open(new_file_path, 'r') as f:
data = json.load(f)
data = {key.lower(): value for key, value in data.items()}
caption = data.get('caption', None)
image_url = data.get('url', None) # Assuming the path to the image is stored in the 'path' key
landesc = data.get('path', None)
image_file_name = data.get('file_name', None)
search_query = data.get('query', None)
if caption == None or image_url == None or landesc == None or image_file_name == None or search_query == None:
print(f"Missing data in JSON file: {file_path}")
sub_landesc = landesc.split("\\")[-1]
json_file_name = os.path.basename(file_path).split(".")[0]
image_file_name_without_extension = image_file_name.split(".")[0]
folder_path = os.path.dirname(file_path)
image_path = os.path.join(folder_path, image_file_name)
base64_image = encode_image(image_path)
if caption:
PROMPT_MESSAGES = [
{
"role": "user",
"content": [
{
"type": "text",
"text": search_query + "\n" + caption
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
},
{
"role": "system",
"content": f"""Given an image and its caption, generate two short questions and answers, two multiple-choice questions and answers, one true/false question, and one long question and answer. Refer to the caption for the context/hint. Take into account the cultural diversity of {sub_landesc}"""
+ """
Follow the following rules while designing questions and answers:
1. The question must be answerable only by looking at the image.
2. Ensure that the questions are culturally relevant and specific to the image.
3. Provide answers that are concise, accurate, and directly related to the question.
4. You will also need to provide 1 correct option and 3 other incorrect options (distractors).
For the distractors, choose options that are relevant, not obvious wrong answers.
5. The question must be answerable even without the multiple-choice.
Example of the invalid question: (“What song is not performed by this musician” –
not answerable if you don’t know the choices).
6. Make sure the questions are written fluently in English.
7. Be mindful of cultural sensitivities and avoid stereotyping or misrepresenting cultural aspects.
8. Ensure there are variations in your questions. Identity questions are fine, eg “What is this”,
or “where is this”. But additionally adding more complex/difficult questions would be great.
For example, multi-hop reasoning, counting, referencing, or questions that require local commonsense knowledge to be answered.
9. Just generate these in English.
10. For short questions and answers, don't keep it very short, include at least 2 sentences.
11. Make the questions distinct and unique from each other.
Give the answers in the following JSON format and make sure to only output a valid JSON,
{
"short_questions": [
{
"question": "What is the name of this building?",
"answer": "Eiffel Tower"
},
{
"question": "What is the name of this building?",
"answer": "Eiffel Tower"
}
],
"multiple_choice_questions": [
{
"question": "What is the name of this building?",
"answer": "Eiffel Tower",
"options": ["Eiffel Tower", "Empire State Building", "Burj Khalifa", "Petronas Towers"]
},
{
"question": "What is the name of this building?",
"answer": "Eiffel Tower",
"options": ["Eiffel Tower", "Empire State Building", "Burj Khalifa", "Petronas Towers"]
}
],
"true_false_question":
{
"question": "Does this image depict/symbolize some history",
"answer": "True, it does"
},
"long_question": {
"question": "What is the name of this building? Describe why it was originally built, the initial public reception, and how it became a global cultural icon over time.",
"answer": "The Eiffel Tower, constructed between 1887 and 1889, was originally built as the entrance arch to the 1889 World’s Fair, held in Paris to commemorate the 100th anniversary of the French Revolution. Designed by engineer Gustave Eiffel, the tower was initially met with mixed reactions. Many Parisians, including prominent artists and intellectuals, criticized its modern design, labeling it as an eyesore. However, despite the criticism, the Eiffel Tower quickly gained recognition as a groundbreaking achievement in engineering and design. Standing at 300 meters tall (about 984 feet), it was the tallest man-made structure in the world at the time, made possible by the innovative use of wrought iron."
}
}
"""
}
]
try:
assert image_file_name_without_extension == json_file_name, f"Image file name {file_path} does not match JSON file name {json_file_name}"
response = client.chat.completions.create(
model="gpt-4o",
messages=PROMPT_MESSAGES
)
t_data = {
"image_url": image_url,
"questions": response.choices[0].message.content,
"langdesc": landesc,
"caption": caption
}
dl.append(t_data)
except Exception as e:
if "invalid_image" in str(e):
invalid_image_count += 1
print(f"Invalid image: {image_url}")
else:
print(f"Error processing image {image_url}: {e}")
with open(f"{save_path}/{language}.json", 'w') as f:
json.dump(dl, f, indent=2, default=str)
print(f"Number of images with errors: {invalid_image_count}")
def main():
# add arguments
parser = ArgumentParser()
parser.add_argument("--folder_path", type=str, help="Path to the folder containing the subfolders with images and JSON files")
parser.add_argument("--save_path", type=str, help="Path to save the output JSON files")
args = parser.parse_args()
for language in tqdm(os.listdir(args.folder_path)):
subfolder_path = os.path.join(args.folder_path, language)
print(f'language {language}')
read_files_from_subfolder(subfolder_path, language, args.save_path)