-
Notifications
You must be signed in to change notification settings - Fork 0
/
transforms.py
106 lines (82 loc) · 3.82 KB
/
transforms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from typing import List
import typing
import torch
import torchvision.datapoints
import torchvision.transforms.v2 as T
import torchvision.transforms.v2.functional as F
#https://github.com/pytorch/vision/issues/6236#issuecomment-1175971587
class Resize_with_pad:
def __init__(self, w=256, h=256, interpolation=T.InterpolationMode.BILINEAR, antialias=True):
self.w = w
self.h = h
self.interpolation = interpolation
self.antialias = antialias
def __call__(self, sample):
if isinstance(sample, typing.Sequence):
# apply to all elements if element is a sequence
return tuple(self.do(i) for i in sample)
else:
return self.do(sample)
def do(self, image):
if isinstance(image, torch.Tensor):
h_1, w_1 = image.shape[-2:]
else:
w_1, h_1 = image.size
ratio_f = self.w / self.h
ratio_1 = w_1 / h_1
# check if the original and final aspect ratios are the same within a margin
if round(ratio_1, 2) != round(ratio_f, 2):
# padding to preserve aspect ratio
hp = int(w_1/ratio_f - h_1)
wp = int(ratio_f * h_1 - w_1)
if hp > 0 and wp < 0:
hp = hp // 2
image = F.pad(image, (0, hp, 0, hp), 0, "constant")
return F.resize(image, [self.h, self.w], interpolation=self.interpolation, antialias=self.antialias)
elif hp < 0 and wp > 0:
wp = wp // 2
image = F.pad(image, (wp, 0, wp, 0), 0, "constant")
return F.resize(image, [self.h, self.w], interpolation=self.interpolation, antialias=self.antialias)
else:
return F.resize(image, [self.h, self.w], interpolation=self.interpolation, antialias=self.antialias)
# Highest class index that still describes an object and not background/void
PASCAL_VOC_OBJECT_CLASS_MAX=20
class ClipMaskClasses():
def __init__(self, max_class: int) -> None:
self.max_class = max_class
def __call__(self, sample):
if isinstance(sample, typing.Sequence):
# apply to all elements if element is a sequence
return tuple(self.__call__(i) for i in sample)
elif isinstance(sample, torchvision.datapoints.Mask):
return self.do(sample)
else:
return sample
def do(self, x):
x = torchvision.datapoints.Mask(torch.where(x <= 20, x, 0))
return x
def make_transforms(mean, std, augment_level=0):
# apply anti-aliasing for resize operations, this will be skipped automagically for masks of type
# torchvision.datapoints.Mask
antialias = True
oplist = []
oplist.append(T.ToImageTensor())
if not augment_level:
oplist.append(T.Resize(size=256, antialias=antialias))
oplist.append(T.CenterCrop(256))
if augment_level >= 1:
oplist.append(T.RandomResizedCrop(size=256, scale=(0.3, 1.0), ratio=(1,1), antialias=antialias))
oplist.append(T.RandomHorizontalFlip())
if augment_level >= 2:
oplist.append(T.ColorJitter(brightness=.3, hue=.2, contrast=.4, saturation=.3))
# very mild gaussian blur, almost no effect. we leave it in as a good luck charm.
# larger kernels or larger sigma degrade performance, even on validation set.
oplist.append(T.GaussianBlur(kernel_size=3, sigma=(0.1, 1.0)))
assert augment_level <= 2, "Augmentation level "+str(augment_level)+"?? What is this, the future??"
oplist.append(T.ConvertImageDtype(torch.float32))
oplist.append(T.Normalize(mean=mean, std=std))
oplist.append(ClipMaskClasses(PASCAL_VOC_OBJECT_CLASS_MAX))
return T.Compose(oplist)
def inv_normalize(mean, std):
return T.Normalize(mean=[-m / s for m, s in zip(mean, std)],
std=[1 / s for s in std])