-
Notifications
You must be signed in to change notification settings - Fork 4
/
test.py
177 lines (147 loc) · 5.89 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# Modified by Sukmin Yun (sukmin.yun@kaist.ac.kr)
#
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
"""Train a video classification model."""
import pprint
import torch
import pickle
import os.path as osp
from fvcore.nn.precise_bn import get_bn_modules, update_bn_stats
from torch.cuda.amp import autocast, GradScaler
from fvcore.common.file_io import PathManager
import research_platform.utils.losses as losses
import research_platform.utils.optimizer as optim
import research_platform.utils.loader as loader
import research_platform.utils.checkpoint as cu
import research_platform.utils.distributed as du
import research_platform.utils.logging as logging
import research_platform.utils.misc as misc
import research_platform.visualization.tensorboard_vis as tb
from research_platform.models import build_model
from research_platform.utils.meters import TestMeter
import numpy as np
logger = logging.get_logger(__name__)
@torch.no_grad()
def perform_test(test_loader, model, test_meter, cfg, shuffle=False, writer=None):
"""
For classification:
Perform mutli-view testing that uniformly samples N clips from a video along
its temporal axis. For each clip, it takes 3 crops to cover the spatial
dimension, followed by averaging the softmax scores across all Nx3 views to
form a video-level prediction. All video predictions are compared to
ground-truth labels and the final testing performance is logged.
For detection:
Perform fully-convolutional testing on the full frames without crop.
Args:
test_loader (loader): video testing loader.
model (model): the pretrained video model to test.
test_meter (TestMeter): testing meters to log and ensemble the testing
results.
cfg (CfgNode): configs. Details can be found in
slowfast/config/defaults.py
writer (TensorboardWriter object, optional): TensorboardWriter object
to writer Tensorboard log.
"""
# Enable eval mode.
model.eval()
test_meter.iter_tic()
for cur_iter, (inputs, labels, meta) in enumerate(test_loader):
# Transfer the data to the current GPU device.
if cfg.NUM_GPUS:
if isinstance(inputs, (list,)):
for i in range(len(inputs)):
inputs[i] = inputs[i].cuda(non_blocking=True)
else:
inputs = inputs.cuda(non_blocking=True)
labels = labels.cuda()
if shuffle:
B, T, C, H, W = inputs.shape
shuffled_indices = np.random.permutation(T)
inputs = inputs[:, shuffled_indices]
online_batch_size = inputs[0].size(0)
test_meter.data_toc()
# Perform the forward pass.
preds = model(inputs)
video_index = meta['video_index'].cuda(non_blocking=True)
# Gather all the predictions across all the devices to perform ensemble.
if cfg.NUM_GPUS > 1:
preds, labels, video_index = du.all_gather(
[preds, labels, video_index]
)
if cfg.NUM_GPUS:
preds = preds.cpu()
labels = labels.cpu()
video_index = video_index.cpu()
test_meter.iter_toc()
# Update and log stats.
test_meter.update_stats(
preds.detach(), labels.detach(), video_index.detach()
)
test_meter.log_iter_stats(cur_iter)
test_meter.iter_tic()
# Log epoch stats and print the final testing results.
all_preds = test_meter.video_preds.clone().detach()
all_labels = test_meter.video_labels
if cfg.NUM_GPUS:
all_preds = all_preds.cpu()
all_labels = all_labels.cpu()
if writer is not None:
writer.plot_eval(preds=all_preds, labels=all_labels)
if cfg.ERM_TEST.SAVE_RESULTS_PATH != "":
save_path = osp.join(cfg.OUTPUT_DIR, cfg.ERM_TEST.SAVE_RESULTS_PATH)
with PathManager.open(save_path, "wb") as f:
pickle.dump([all_labels, all_labels], f)
logger.info(
"Successfully saved prediction results to {}".format(save_path)
)
test_meter.finalize_metrics()
return test_meter
def test(cfg):
"""
Perform multi-view testing on the pretrained video model.
Args:
cfg (CfgNode): configs. Details can be found in
slowfast/config/defaults.py
"""
# Set up environment.
du.init_distributed_training(cfg)
# Set random seed from configs.
misc.set_random_seed(cfg.RNG_SEED)
# Setup logging format.
logging.setup_logging(cfg.OUTPUT_DIR)
# Print config.
logger.info("Test with config:")
logger.info(cfg)
# Build the video model and print model statistics.
model = build_model(cfg)
if du.is_master_proc() and cfg.LOG_MODEL_INFO:
misc.log_model_info(model, cfg, use_train_input=False)
cu.load_test_checkpoint(cfg, model)
# Create video testing loaders.
test_loader = loader.construct_loader(cfg, "test")
logger.info("Testing model for {} iterations".format(len(test_loader)))
assert (
len(test_loader.dataset)
% (cfg.ERM_TEST.NUM_ENSEMBLE_VIEWS * cfg.ERM_TEST.NUM_SPATIAL_CROPS)
== 0
)
# Create meters for multi-view testing.
test_meter = TestMeter(
len(test_loader.dataset)
// (cfg.ERM_TEST.NUM_ENSEMBLE_VIEWS * cfg.ERM_TEST.NUM_SPATIAL_CROPS),
cfg.ERM_TEST.NUM_ENSEMBLE_VIEWS * cfg.ERM_TEST.NUM_SPATIAL_CROPS,
cfg.MODEL.NUM_CLASSES,
len(test_loader),
ensemble_method=cfg.DATA.ENSEMBLE_METHOD,
)
# Set up writer for logging to Tensorboard format.
if cfg.TENSORBOARD.ENABLE and du.is_master_proc(
cfg.NUM_GPUS * cfg.NUM_SHARDS
):
writer = tb.TensorboardWriter(cfg)
else:
writer = None
# # Perform multi-view test on the entire dataset.
test_meter = perform_test(test_loader, model, test_meter, cfg, writer)
if writer is not None:
writer.close()