-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
14 changed files
with
517 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Default ignored files | ||
/workspace.xml | ||
logs/ | ||
vqa_experiments/__pycache__ | ||
snapshots | ||
*__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
""" | ||
Written by Kushal, modified by Robik | ||
""" | ||
import vqa_experiments.vqa_models as vqa_models | ||
import torch | ||
from vqa_experiments.dictionary import Dictionary | ||
import sys | ||
from vqa_experiments.vqa_models import WordEmbedding | ||
from vqa_experiments.s_mac import s_mac | ||
|
||
# Model and runtime configuration choices. A copy of this will be saved along with the model | ||
# weights so that it is easy to reproduce later. | ||
|
||
# Data | ||
data_path = '/hdd/robik/TDIUC' | ||
dataset = 'tdiuc' | ||
img_feat = 'resnetpq_iid' # updn, resnet, updnmkii, resnetmkii | ||
mkii = False # If you want to also load codebook indices | ||
data_subset = 1.0 | ||
d = Dictionary.load_from_file(f'vqa_experiments/data/dictionary_{dataset}.pkl') | ||
|
||
map_path = f'{data_path}/map_tdiuc_resnet.json' | ||
|
||
train_file = f'{data_path}/train_{dataset}.h5' | ||
val_file = f'{data_path}/val_{dataset}.h5' | ||
|
||
train_batch_size = 512 | ||
val_batch_size = 512 | ||
num_classes = 1480 # Number of classifier units 1480 for TDIUC, 31xx for VQA,28 for CLEVR | ||
|
||
train_on = 'full' | ||
test_on = 'full' # 'full' or 'valid' | ||
|
||
arrangement = dict() | ||
|
||
only_first_k = dict() | ||
only_first_k['train'] = sys.maxsize # Use sys.maxsize to load all | ||
only_first_k['val'] = sys.maxsize # Use sys.maxsize to load all | ||
|
||
qnorm = True # Normalize ques feat? | ||
imnorm = True # Normalize img feat? | ||
|
||
shuffle = False | ||
|
||
fetch_all = False | ||
|
||
if fetch_all: # For ques_type, ans_class or ans_type arrangement, get all qualifying data | ||
assert (not shuffle) | ||
train_batch_size = 1 | ||
val_batch_size = 1 # Dataset[i] will return all qualifying data of idx 1 | ||
|
||
load_in_memory = False | ||
use_all = False | ||
use_pooled = False | ||
use_lstm = True | ||
|
||
# Training | ||
overwrite_expt_dir = True # Set to True during dev phase | ||
max_epochs = 20 | ||
test_interval = 8 | ||
|
||
# Model | ||
attn_type = 'old' # new or old | ||
num_attn_hops = 2 | ||
soft_targets = False | ||
bidirectional = True | ||
lstm_out = 512 | ||
emb_dim = 300 | ||
cnn_feat_size = 2048 # 2048 for resnet/updn/clevr_layer4 ; 1024 for clevr layer_3 | ||
|
||
classfier_dropout = True | ||
embedding_dropout = True | ||
attention_dropout = True | ||
num_hidden = 1024 | ||
use_model = vqa_models.UpDown # BLAH | ||
optimizer = torch.optim.Adamax | ||
lr = 2e-3 | ||
save_models = False | ||
if not soft_targets: | ||
train_on = 'valid' | ||
num_rehearsal_samples = 50 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#!/usr/bin/env bash | ||
#source activate remind_proj | ||
|
||
lr=2e-3 | ||
CONFIG=TDIUC_streaming | ||
export PYTHONPATH=/hdd/robik/projects/REMIND | ||
|
||
#DATA_ORDER=iid | ||
#expt=${CONFIG}_${DATA_ORDER}_${lr} | ||
|
||
#CUDA_VISIBLE_DEVICES=0 nohup python -u vqa_experiments/vqa_trainer.py \ | ||
#--config_name ${CONFIG} \ | ||
#--expt_name ${expt} \ | ||
#--stream_with_rehearsal \ | ||
#--data_order ${DATA_ORDER} \ | ||
#--lr ${lr} &> logs/${expt}.log & | ||
|
||
DATA_ORDER=qtype # or qtype | ||
expt=${CONFIG}_${DATA_ORDER}_${lr} | ||
|
||
CUDA_VISIBLE_DEVICES=0 python -u vqa_experiments/vqa_trainer.py \ | ||
--config_name ${CONFIG} \ | ||
--expt_name ${expt} \ | ||
--stream_with_rehearsal \ | ||
--data_order ${DATA_ORDER} \ | ||
--lr ${lr} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
import argparse, os | ||
import h5py | ||
import numpy as np | ||
from scipy.misc import imread, imresize | ||
|
||
import torch | ||
import torchvision | ||
import json | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--path', type=str, default='/hdd/robik/TDIUC') | ||
parser.add_argument('--max_images', default=None, type=int) | ||
|
||
parser.add_argument('--image_height', default=224, type=int) | ||
parser.add_argument('--image_width', default=224, type=int) | ||
|
||
parser.add_argument('--model', default='resnet152') | ||
parser.add_argument('--model_stage', default=4, type=int) | ||
parser.add_argument('--batch_size', default=128, type=int) | ||
|
||
|
||
def build_model(args): | ||
if not hasattr(torchvision.models, args.model): | ||
raise ValueError('Invalid model "%s"' % args.model) | ||
if not 'resnet' in args.model: | ||
raise ValueError('Feature extraction only supports ResNets') | ||
cnn = getattr(torchvision.models, args.model)(pretrained=True) | ||
layers = [ | ||
cnn.conv1, | ||
cnn.bn1, | ||
cnn.relu, | ||
cnn.maxpool, | ||
] | ||
for i in range(args.model_stage): | ||
name = 'layer%d' % (i + 1) | ||
layers.append(getattr(cnn, name)) | ||
model = torch.nn.Sequential(*layers) | ||
model.cuda() | ||
model.eval() | ||
return model | ||
|
||
|
||
def run_batch(cur_batch, model): | ||
mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3, 1, 1) | ||
std = np.array([0.229, 0.224, 0.224]).reshape(1, 3, 1, 1) | ||
|
||
image_batch = np.concatenate(cur_batch, 0).astype(np.float32) | ||
image_batch = (image_batch / 255.0 - mean) / std | ||
image_batch = torch.FloatTensor(image_batch).cuda() | ||
with torch.no_grad(): | ||
feats = model(image_batch) | ||
feats = feats.data.cpu().clone().numpy() | ||
return feats | ||
|
||
|
||
def path2iid(path): | ||
return int(path.split('/')[-1].split('.')[0].split('_')[-1]) | ||
|
||
|
||
def main(args): | ||
args.output_h5_file = args.path + "/all_tdiuc_resnet.h5" | ||
p1 = f'{args.path}/Images/train2014' | ||
input_paths = [os.path.join(p1, a) for a in os.listdir(p1)] | ||
|
||
p1 = f'{args.path}/Images/val2014' | ||
input_paths.extend([os.path.join(p1, a) for a in os.listdir(p1)]) | ||
|
||
model = build_model(args) | ||
img_size = (args.image_height, args.image_width) | ||
with h5py.File(args.output_h5_file, 'w') as f: | ||
feat_dset = None | ||
i0 = 0 | ||
cur_batch = [] | ||
iid = [] | ||
for i, path in enumerate(input_paths): | ||
iid.append(path2iid(path)) | ||
img = imread(path, mode='RGB') | ||
img = imresize(img, img_size, interp='bicubic') | ||
img = img.transpose(2, 0, 1)[None] | ||
cur_batch.append(img) | ||
if len(cur_batch) == args.batch_size: | ||
feats = run_batch(cur_batch, model) | ||
if feat_dset is None: | ||
N = len(input_paths) | ||
_, C, H, W = feats.shape | ||
feat_dset = f.create_dataset('image_features', (N, H * W, C), | ||
dtype=np.float32) | ||
iid_dset = f.create_dataset('iids', (N,), | ||
dtype=np.int64) | ||
|
||
i1 = i0 + len(cur_batch) | ||
feats_r = feats.reshape(-1, 2048, 49) | ||
feat_dset[i0:i1] = np.transpose(feats_r, (0, 2, 1)) | ||
i0 = i1 | ||
print('Processed %d / %d images' % (i1, len(input_paths))) | ||
cur_batch = [] | ||
|
||
if len(cur_batch) > 0: | ||
feats = run_batch(cur_batch, model) | ||
feats_r = feats.reshape(-1, 2048, 49) | ||
i1 = i0 + len(cur_batch) | ||
feat_dset[i0:i1] = np.transpose(feats_r, (0, 2, 1)) | ||
print('Processed %d / %d images' % (i1, len(input_paths))) | ||
iid_dset[:len(iid)] = np.array(iid, dtype=np.int64) | ||
|
||
feat_file = h5py.File(args.output_h5_file, 'r') | ||
|
||
iid_list = feat_file['iids'][:] | ||
|
||
iid2idx = {str(iid): idx for idx, iid in enumerate(iid_list)} | ||
idx2iid = {idx: str(iid) for idx, iid in enumerate(iid_list)} | ||
|
||
lut = dict() | ||
lut['image_id_to_ix'] = iid2idx | ||
lut['image_ix_to_id'] = idx2iid | ||
|
||
json.dump(lut, open(f'{args.path}/map_tdiuc_resnet.json', 'w')) | ||
|
||
|
||
if __name__ == '__main__': | ||
args = parser.parse_args() | ||
main(args) |
Oops, something went wrong.