-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodels.py
61 lines (50 loc) · 2.49 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import torch
import torchvision.models as models
import torch.nn as nn
import consts
resnet50 = models.resnet50(pretrained=False, progress=True)
class Encoder(nn.Module):
"""
Resnet +fc
"""
def __init__(self, end_num_of_features, model_name='resnet50', pretrained=True):
"""Construct a MoCo encoder composed of a resent50 base, and then followed by a fully connected head.
The fully connected head of the model is composed of one linear transformation to the same (hidden)
dimensionality, followed by a linear projection to `final_num_of_features` dimensions. Each of these linear
layers is followed by a ReLU non-linearity.
:param end_num_of_features: The dimensionality of the final layer of a sequential head at the end of a
resnet50 base model.
:param pretrained: Whether or not the model we load should be a version pre-trained on Imagenet.
"""
if model_name == 'resnet50':
model = models.resnet50(pretrained=pretrained, progress=True)
else:
raise RuntimeError(f"{model_name} is an unsupported model for the MoCo encoder. "
f"Please consider using resnet50 instead.")
self.final_num_of_features = end_num_of_features
super(Encoder, self).__init__()
self.resnet50 = nn.Sequential(*(list(model.children())[:-1]))
# MoCo v2 utilizes two fully connected layers as opposed to one.
self.fc1 = nn.Sequential(nn.Linear(consts.HIDDEN_REPRESENTATION_DIM, consts.HIDDEN_REPRESENTATION_DIM),
nn.ReLU(),
nn.Linear(consts.HIDDEN_REPRESENTATION_DIM, self.final_num_of_features))
self.non_linear_func = nn.ReLU()
def forward(self, x, device):
"""Perform a forward pass through the MoCo encoder with some input batch `x`.
:param x: A batch of image input tensors of shape [batch_size, 224, 224, 3]
:param device: A torch.device.Device instance representing the device on which we perform training.
"""
x = self.resnet50(x.to(device))
x = torch.flatten(x, start_dim=1)
x = self.fc1(x)
x = self.non_linear_func(x)
# Normalize by the L2 norm
l2_norm = torch.linalg.norm(x, dim=1)
x = (x.T / l2_norm).T
return x
if __name__ == '__main__':
encoder_model = Encoder(128)
print(encoder_model)
x_demo = torch.rand((4, 3, 224, 224))
y = encoder_model(x_demo)
print("Done")