-
Notifications
You must be signed in to change notification settings - Fork 145
/
Copy pathimage_data_processing.py
182 lines (152 loc) · 7.75 KB
/
image_data_processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import re
import os
import time
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from rich.progress import Progress, TextColumn, BarColumn, TimeElapsedColumn
from data_processing_common import sanitize_filename # Import sanitize_filename
def get_text_from_generator(generator):
"""Extract text from the generator response."""
response_text = ""
try:
while True:
response = next(generator)
choices = response.get('choices', [])
for choice in choices:
delta = choice.get('delta', {})
if 'content' in delta:
response_text += delta['content']
except StopIteration:
pass
return response_text
def process_single_image(image_path, image_inference, text_inference, silent=False, log_file=None):
"""Process a single image file to generate metadata."""
start_time = time.time()
# Create a Progress instance for this file
with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TimeElapsedColumn()
) as progress:
task_id = progress.add_task(f"Processing {os.path.basename(image_path)}", total=1.0)
foldername, filename, description = generate_image_metadata(image_path, progress, task_id, image_inference, text_inference)
end_time = time.time()
time_taken = end_time - start_time
message = f"File: {image_path}\nTime taken: {time_taken:.2f} seconds\nDescription: {description}\nFolder name: {foldername}\nGenerated filename: {filename}\n"
if silent:
if log_file:
with open(log_file, 'a') as f:
f.write(message + '\n')
else:
print(message)
return {
'file_path': image_path,
'foldername': foldername,
'filename': filename,
'description': description
}
def process_image_files(image_paths, image_inference, text_inference, silent=False, log_file=None):
"""Process image files sequentially."""
data_list = []
for image_path in image_paths:
data = process_single_image(image_path, image_inference, text_inference, silent=silent, log_file=log_file)
data_list.append(data)
return data_list
def generate_image_metadata(image_path, progress, task_id, image_inference, text_inference):
"""Generate description, folder name, and filename for an image file."""
# Total steps in processing an image
total_steps = 3
# Step 1: Generate description using image_inference
description_prompt = "Please provide a detailed description of this image, focusing on the main subject and any important details."
description_generator = image_inference._chat(description_prompt, image_path)
description = get_text_from_generator(description_generator).strip()
progress.update(task_id, advance=1 / total_steps)
# Step 2: Generate filename using text_inference
filename_prompt = f"""Based on the description below, generate a specific and descriptive filename for the image.
Limit the filename to a maximum of 3 words. Use nouns and avoid starting with verbs like 'depicts', 'shows', 'presents', etc.
Do not include any data type words like 'image', 'jpg', 'png', etc. Use only letters and connect words with underscores.
Description: {description}
Example:
Description: A photo of a sunset over the mountains.
Filename: sunset_over_mountains
Now generate the filename.
Output only the filename, without any additional text.
Filename:"""
filename_response = text_inference.create_completion(filename_prompt)
filename = filename_response['choices'][0]['text'].strip()
# Remove 'Filename:' prefix if present
filename = re.sub(r'^Filename:\s*', '', filename, flags=re.IGNORECASE).strip()
progress.update(task_id, advance=1 / total_steps)
# Step 3: Generate folder name from description using text_inference
foldername_prompt = f"""Based on the description below, generate a general category or theme that best represents the main subject of this image.
This will be used as the folder name. Limit the category to a maximum of 2 words. Use nouns and avoid verbs.
Do not include specific details, words from the filename, or any generic terms like 'untitled' or 'unknown'.
Description: {description}
Examples:
1. Description: A photo of a sunset over the mountains.
Category: landscapes
2. Description: An image of a smartphone displaying a storage app with various icons and information.
Category: technology
3. Description: A close-up of a blooming red rose with dew drops.
Category: nature
Now generate the category.
Output only the category, without any additional text.
Category:"""
foldername_response = text_inference.create_completion(foldername_prompt)
foldername = foldername_response['choices'][0]['text'].strip()
# Remove 'Category:' prefix if present
foldername = re.sub(r'^Category:\s*', '', foldername, flags=re.IGNORECASE).strip()
progress.update(task_id, advance=1 / total_steps)
# Remove any unwanted words and stopwords
unwanted_words = set([
'the', 'and', 'based', 'generated', 'this', 'is', 'filename', 'file', 'image', 'picture', 'photo',
'folder', 'category', 'output', 'only', 'below', 'text', 'jpg', 'png', 'jpeg', 'gif', 'bmp', 'svg',
'logo', 'in', 'on', 'of', 'with', 'by', 'for', 'to', 'from', 'a', 'an', 'as', 'at', 'red', 'blue',
'green', 'color', 'colors', 'colored', 'text', 'graphic', 'graphics', 'main', 'subject', 'important',
'details', 'description', 'depicts', 'show', 'shows', 'display', 'illustrates', 'presents', 'features',
'provides', 'covers', 'includes', 'demonstrates', 'describes'
])
stop_words = set(stopwords.words('english'))
all_unwanted_words = unwanted_words.union(stop_words)
lemmatizer = WordNetLemmatizer()
# Function to clean and process the AI output
def clean_ai_output(text, max_words):
# Remove file extensions and special characters
text = re.sub(r'\.\w{1,4}$', '', text) # Remove file extensions like .jpg, .png
text = re.sub(r'[^\w\s]', ' ', text) # Remove special characters
text = re.sub(r'\d+', '', text) # Remove digits
text = text.strip()
# Split concatenated words (e.g., 'GoogleChrome' -> 'Google Chrome')
text = re.sub(r'([a-z])([A-Z])', r'\1 \2', text)
# Tokenize and lemmatize words
words = word_tokenize(text)
words = [word.lower() for word in words if word.isalpha()]
words = [lemmatizer.lemmatize(word) for word in words]
# Remove unwanted words and duplicates
filtered_words = []
seen = set()
for word in words:
if word not in all_unwanted_words and word not in seen:
filtered_words.append(word)
seen.add(word)
# Limit to max words
filtered_words = filtered_words[:max_words]
return '_'.join(filtered_words)
# Process filename
filename = clean_ai_output(filename, max_words=3)
if not filename or filename.lower() in ('untitled', ''):
# Use keywords from the description
filename = clean_ai_output(description, max_words=3)
if not filename:
filename = 'image_' + os.path.splitext(os.path.basename(image_path))[0]
sanitized_filename = sanitize_filename(filename, max_words=3)
# Process foldername
foldername = clean_ai_output(foldername, max_words=2)
if not foldername or foldername.lower() in ('untitled', ''):
# Attempt to extract keywords from the description
foldername = clean_ai_output(description, max_words=2)
if not foldername:
foldername = 'images'
sanitized_foldername = sanitize_filename(foldername, max_words=2)
return sanitized_foldername, sanitized_filename, description