-
Notifications
You must be signed in to change notification settings - Fork 0
/
Augment.py
129 lines (106 loc) · 4.73 KB
/
Augment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import albumentations as A
from albumentations.core.keypoints_utils import KeypointParams
from albumentations.augmentations.dropout import coarse_dropout
import numpy as np
import cv2
from matplotlib import pyplot as plt
import os
from Config import Config
"""
This file creates an augmentation pipeline for images.
"""
"""
Transform the original image without background (R, G, B, Alpha) into single-channel gray-scale.
"""
def quad_channel_2_single_channel(img_path):
# Read the image with 4 channels (RGBA)
# Replace the suffix with .png
base, _ = os.path.splitext(img_path)
img_path = f"{base}.png"
image = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
# Check if the image has 4 channels
if image.shape[2] == 4:
# Split the image into RGB and Alpha channels
bgr = image[:, :, :3] # Get the RGB channels (BGR in OpenCV)
alpha = image[:, :, 3] # Get the alpha channel
# Convert the RGB channels to grayscale (using the standard formula for grayscale)
gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)
without_bg = gray * alpha # Because alpha is 0 for the background
return without_bg
else:
print("The image does not have 4 channels.")
def create_transform_pipeline(augment, model_input_shape):
if augment:
return A.Compose(
[
# Step 1: Pad the image with zeros to ensure the object stays visible
A.PadIfNeeded(
min_height=1200, # Minimum height after padding
min_width=1600, # Minimum width after padding
border_mode=cv2.BORDER_CONSTANT,
value=0, # Padding value (0 for black)
),
# A.Rotate(limit=50, p=1.0, border_mode=cv2.BORDER_CONSTANT), # Rotate the image
# A.HorizontalFlip(p=0.5), # Since most of the images are oriented the same way, we might want to keep it that way so that it is easier for the model
A.ShiftScaleRotate(
shift_limit=0.15,
scale_limit=0.15,
rotate_limit=30,
p=1.0,
border_mode=cv2.BORDER_CONSTANT,
), # Combined shift/scale/rotate
A.CoarseDropout(
num_holes_range=(5, 12),
hole_height_range=(25, 60),
hole_width_range=(25, 60),
p=1.0,
), # Randomly mask out patches
A.GaussNoise(var_limit=(8.0, 40.0), p=0.4), # Add Gaussian noise
A.RandomBrightnessContrast(
brightness_limit=0.1, contrast_limit=0.1, p=0.5
), # Adjust brightness/contrast
A.Blur(blur_limit=3, p=0.3), # Apply random blur
# A.ElasticTransform(alpha=1, sigma=50, p=0.2, border_mode=cv2.BORDER_CONSTANT), # Elastic deformation
A.Resize(
height=model_input_shape[0], width=model_input_shape[1], p=1.0
), # Resize image to fixed shape
],
keypoint_params=A.KeypointParams(format="xy"), # Keypoints format is (x, y)
)
else:
# When infering, we are just interested in resizing the image to the model input shape.
return A.Compose(
[
A.Resize(
height=model_input_shape[0], width=model_input_shape[1], p=1.0
), # Resize image to fixed shape
],
keypoint_params=A.KeypointParams(format="xy"), # Keypoints format is (x, y)
)
def reverse_infer_keypoints(keypoints, original_shape, target_shape):
"""
Reverse the resizing operation on keypoints.
Args:
keypoints (list of tuples): List of keypoints in (x, y) format.
original_shape (tuple): Original image shape (height, width).
target_shape (tuple): Target image shape (height, width).
Returns:
list of tuples: Keypoints transformed back to the original image dimensions.
"""
orig_height, orig_width = original_shape
target_height, target_width = target_shape
scale_factor_x = orig_width / target_width
scale_factor_y = orig_height / target_height
# Scale keypoints back
reversed_keypoints = [
(kp[0] * scale_factor_x, kp[1] * scale_factor_y) for kp in keypoints
]
return reversed_keypoints
# Get augmented image and keypoints
def prepare_for_model(img_path, pipeline, keypoints=[]):
single_channel_image = quad_channel_2_single_channel(img_path)
input_image = pipeline(image=single_channel_image, keypoints=keypoints)
return (
input_image["image"],
input_image["keypoints"],
) # if not augment_images else (augmented["image"], [])