Skip to content

Commit

Permalink
Added support for TDIUC
Browse files Browse the repository at this point in the history
  • Loading branch information
erobic committed Aug 14, 2020
1 parent 59bde34 commit ba12f18
Show file tree
Hide file tree
Showing 14 changed files with 517 additions and 10 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Default ignored files
/workspace.xml
logs/
vqa_experiments/__pycache__
snapshots
*__pycache__
37 changes: 37 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,16 @@ We save out incremental weights and associated data for REMIND after each evalua
3. Run `run_imagenet_experiment.sh`

## Training REMIND on VQA Datasets
We use the gensen library for question features. Execute the following steps to set it up:
```
cd ${GENSENPATH}
git clone git@github.com:erobic/gensen.git
cd ${GENSENPATH}/data/embedding
chmod +x glove25.sh && ./glove2h5.sh
cd ${GENSENPATH}/data/models
chmod +x download_models.sh && ./download_models.sh
```

### Training REMIND on CLEVR
_Note: For convenience, we pre-extract all the features including the PQ encoded features. This requires 140 GB of free space._
1. Download and extract CLEVR images+annotations:
Expand Down Expand Up @@ -130,7 +140,34 @@ _Note: For convenience, we pre-extract all the features including the PQ encoded
- In `pq_encoding_clevr.py`, change the value of `PATH` and `streaming_type` (as either 'iid' or 'qtype')
- Train PQ encoder and extract features: `python vqa_experiments/clevr/pq_encoding_clevr.py`

4. Train REMIND
- Edit `data_path` in `vqa_experiments/configs/config_CLEVR_streaming.py`
- Run `./vqa_experiments/run_clevr_experiment.sh` (Set `DATA_ORDER` to either `qtype` or `iid` to define the data order)

### Training REMIND on TDIUC
1. Download TDIUC
```
cd ${TDIUC_PATH}
wget https://kushalkafle.com/data/TDIUC.zip && unzip TDIUC.zip
cd TDIUC && python setup.py --download Y # You may need to change print '' statements to print('')
```

2. Extract question features
- Edit `vqa_experiments/clevr/extract_question_features_tdiuc.py`, changing the `DATA_PATH` variable to point to TDIUC dataset and `GENSEN_PATH` to point to gensen repository and extract features:
`python vqa_experiments/tdiuc/extract_question_features_tdiuc.py`

- Pre-process the TDIUC questions
Edit `$PATH` variable in `vqa_experiments/clevr/preprocess_tdiuc.py` file, pointing it to the directory where TDIUC was extracted

3. Extract image features, train PQ encoder and extract encoded features
- Extract image features: `python -u vqa_experiments/tdiuc/extract_image_features_tdiuc.py --path /path/to/TDIUC`
- In `pq_encoding_tdiuc.py`, change the value of `PATH` and `streaming_type` (as either 'iid' or 'qtype')
- Train PQ encoder and extract features: `python vqa_experiments/clevr/pq_encoding_clevr.py`

4. Train REMIND
- Edit `data_path` in `vqa_experiments/configs/config_TDIUC_streaming.py`
- Run `./vqa_experiments/run_tdiuc_experiment.sh` (Set `DATA_ORDER` to either `qtype` or `iid` to define the data order)


## Citation
If using this code, please cite our paper.
Expand Down
6 changes: 4 additions & 2 deletions vqa_experiments/clevr/pq_encoding_clevr.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
import h5py
import json

PATH = '/hdd/robik/CLEVR'
# Change these based on data set
PATH = '/hdd/robik/CLEVR' # Change this
streaming_type = 'iid' # Change this

feat_name = f'{PATH}/all_clevr_resnet_largestage3'
train_filename = f'{PATH}/train_clevr.h5'
lut_name = f'{PATH}/map_clevr_resnet_largestage3.json'
streaming_type = 'iid'

feat_dim = 1024
num_feat_maps = 196
Expand Down
2 changes: 1 addition & 1 deletion vqa_experiments/clevr/preprocess_clevr.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
most_common = Counter(meta[m]).most_common()
lut[f'{m}2idx'] = {a[0]: idx for idx, a in enumerate(most_common)}

json.dump(lut, open('LUT_clevr.json', 'w'))
json.dump(lut, open(f'{LUT_tdiuc}/LUT_clevr.json', 'w'))
# %%
dt = h5py.special_dtype(vlen=str)
for split in ['train', 'val']:
Expand Down
4 changes: 1 addition & 3 deletions vqa_experiments/configs/config_CLEVR_streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@
test_on = 'full' # 'full' or 'valid'

arrangement = dict()
arrangement['train'] = 'random' # 'random', 'aidx', 'atypeidx', 'qtypeidx'
arrangement['val'] = 'random' # 'random', 'aidx', 'atypeidx', 'qtypeidx'

# How many to train/test on
# How many of "indices" to train on, E.g., if arrangement is ans_class, it refers
Expand Down Expand Up @@ -83,7 +81,7 @@
num_hidden = 1024
use_model = s_mac.sMacNetwork # BLAH
optimizer = torch.optim.Adamax
lr = 1e-4
lr = 3e-4
save_models = False
if not soft_targets:
train_on = 'valid'
Expand Down
81 changes: 81 additions & 0 deletions vqa_experiments/configs/config_TDIUC_streaming.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""
Written by Kushal, modified by Robik
"""
import vqa_experiments.vqa_models as vqa_models
import torch
from vqa_experiments.dictionary import Dictionary
import sys
from vqa_experiments.vqa_models import WordEmbedding
from vqa_experiments.s_mac import s_mac

# Model and runtime configuration choices. A copy of this will be saved along with the model
# weights so that it is easy to reproduce later.

# Data
data_path = '/hdd/robik/TDIUC'
dataset = 'tdiuc'
img_feat = 'resnetpq_iid' # updn, resnet, updnmkii, resnetmkii
mkii = False # If you want to also load codebook indices
data_subset = 1.0
d = Dictionary.load_from_file(f'vqa_experiments/data/dictionary_{dataset}.pkl')

map_path = f'{data_path}/map_tdiuc_resnet.json'

train_file = f'{data_path}/train_{dataset}.h5'
val_file = f'{data_path}/val_{dataset}.h5'

train_batch_size = 512
val_batch_size = 512
num_classes = 1480 # Number of classifier units 1480 for TDIUC, 31xx for VQA,28 for CLEVR

train_on = 'full'
test_on = 'full' # 'full' or 'valid'

arrangement = dict()

only_first_k = dict()
only_first_k['train'] = sys.maxsize # Use sys.maxsize to load all
only_first_k['val'] = sys.maxsize # Use sys.maxsize to load all

qnorm = True # Normalize ques feat?
imnorm = True # Normalize img feat?

shuffle = False

fetch_all = False

if fetch_all: # For ques_type, ans_class or ans_type arrangement, get all qualifying data
assert (not shuffle)
train_batch_size = 1
val_batch_size = 1 # Dataset[i] will return all qualifying data of idx 1

load_in_memory = False
use_all = False
use_pooled = False
use_lstm = True

# Training
overwrite_expt_dir = True # Set to True during dev phase
max_epochs = 20
test_interval = 8

# Model
attn_type = 'old' # new or old
num_attn_hops = 2
soft_targets = False
bidirectional = True
lstm_out = 512
emb_dim = 300
cnn_feat_size = 2048 # 2048 for resnet/updn/clevr_layer4 ; 1024 for clevr layer_3

classfier_dropout = True
embedding_dropout = True
attention_dropout = True
num_hidden = 1024
use_model = vqa_models.UpDown # BLAH
optimizer = torch.optim.Adamax
lr = 2e-3
save_models = False
if not soft_targets:
train_on = 'valid'
num_rehearsal_samples = 50
34 changes: 34 additions & 0 deletions vqa_experiments/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,41 @@ def compute_clevr_per_type_accuracies(path, preds):
print(some_qids)


def compute_tdiuc_accuracy(PATH, preds):
gt_answers = h5py.File(f'{PATH}/val_tdiuc.h5')['aidx'][:]
gt_qids = h5py.File(f'{PATH}/val_tdiuc.h5')['qid'][:]
gt_qtypes = h5py.File(f'{PATH}/val_tdiuc.h5')['qtypeidx'][:]

qid2qtype = {qid: gt for qid, gt in zip(gt_qids, gt_qtypes)}
qid2gt = {qid: gt for qid, gt in zip(gt_qids, gt_answers)}

acc = defaultdict(list)

for qid in qid2gt:
gt = qid2gt[qid]
qtype = qid2qtype[qid]
if gt == preds[str(qid)]:
acc['overall'].append(1)
acc[qtype].append(1)
else:
acc['overall'].append(0)
acc[qtype].append(0)

mpt = 0
overall = 0
for k in acc:
if k == 'overall':
overall = sum(acc[k]) / len(acc[k])
else:
mpt += sum(acc[k]) / len(acc[k])
mpt = mpt / 12

return mpt, overall


def compute_accuracy(path, dataset, preds):
if dataset == 'clevr':
mpt, overall = compute_clevr_accuracy(path, preds)
elif dataset == 'tdiuc':
mpt, overall = compute_tdiuc_accuracy(path, preds)
print(f"Mean Per Type: {mpt}, Overall: {overall}")
2 changes: 1 addition & 1 deletion vqa_experiments/run_clevr_experiment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ export PYTHONPATH=/hdd/robik/projects/REMIND
#--expt_name ${expt} \
#--stream_with_rehearsal \
#--lr ${lr} &> logs/${expt}.log &
DATA_ORDER=iid # or qtype
DATA_ORDER=qtype # or qtype
expt=${CONFIG}_${DATA_ORDER}_${lr}

CUDA_VISIBLE_DEVICES=0 python -u vqa_experiments/vqa_trainer.py \
Expand Down
26 changes: 26 additions & 0 deletions vqa_experiments/run_tdiuc_experiment.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env bash
#source activate remind_proj

lr=2e-3
CONFIG=TDIUC_streaming
export PYTHONPATH=/hdd/robik/projects/REMIND

#DATA_ORDER=iid
#expt=${CONFIG}_${DATA_ORDER}_${lr}

#CUDA_VISIBLE_DEVICES=0 nohup python -u vqa_experiments/vqa_trainer.py \
#--config_name ${CONFIG} \
#--expt_name ${expt} \
#--stream_with_rehearsal \
#--data_order ${DATA_ORDER} \
#--lr ${lr} &> logs/${expt}.log &

DATA_ORDER=qtype # or qtype
expt=${CONFIG}_${DATA_ORDER}_${lr}

CUDA_VISIBLE_DEVICES=0 python -u vqa_experiments/vqa_trainer.py \
--config_name ${CONFIG} \
--expt_name ${expt} \
--stream_with_rehearsal \
--data_order ${DATA_ORDER} \
--lr ${lr}
122 changes: 122 additions & 0 deletions vqa_experiments/tdiuc/extract_image_features_tdiuc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import argparse, os
import h5py
import numpy as np
from scipy.misc import imread, imresize

import torch
import torchvision
import json

parser = argparse.ArgumentParser()
parser.add_argument('--path', type=str, default='/hdd/robik/TDIUC')
parser.add_argument('--max_images', default=None, type=int)

parser.add_argument('--image_height', default=224, type=int)
parser.add_argument('--image_width', default=224, type=int)

parser.add_argument('--model', default='resnet152')
parser.add_argument('--model_stage', default=4, type=int)
parser.add_argument('--batch_size', default=128, type=int)


def build_model(args):
if not hasattr(torchvision.models, args.model):
raise ValueError('Invalid model "%s"' % args.model)
if not 'resnet' in args.model:
raise ValueError('Feature extraction only supports ResNets')
cnn = getattr(torchvision.models, args.model)(pretrained=True)
layers = [
cnn.conv1,
cnn.bn1,
cnn.relu,
cnn.maxpool,
]
for i in range(args.model_stage):
name = 'layer%d' % (i + 1)
layers.append(getattr(cnn, name))
model = torch.nn.Sequential(*layers)
model.cuda()
model.eval()
return model


def run_batch(cur_batch, model):
mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3, 1, 1)
std = np.array([0.229, 0.224, 0.224]).reshape(1, 3, 1, 1)

image_batch = np.concatenate(cur_batch, 0).astype(np.float32)
image_batch = (image_batch / 255.0 - mean) / std
image_batch = torch.FloatTensor(image_batch).cuda()
with torch.no_grad():
feats = model(image_batch)
feats = feats.data.cpu().clone().numpy()
return feats


def path2iid(path):
return int(path.split('/')[-1].split('.')[0].split('_')[-1])


def main(args):
args.output_h5_file = args.path + "/all_tdiuc_resnet.h5"
p1 = f'{args.path}/Images/train2014'
input_paths = [os.path.join(p1, a) for a in os.listdir(p1)]

p1 = f'{args.path}/Images/val2014'
input_paths.extend([os.path.join(p1, a) for a in os.listdir(p1)])

model = build_model(args)
img_size = (args.image_height, args.image_width)
with h5py.File(args.output_h5_file, 'w') as f:
feat_dset = None
i0 = 0
cur_batch = []
iid = []
for i, path in enumerate(input_paths):
iid.append(path2iid(path))
img = imread(path, mode='RGB')
img = imresize(img, img_size, interp='bicubic')
img = img.transpose(2, 0, 1)[None]
cur_batch.append(img)
if len(cur_batch) == args.batch_size:
feats = run_batch(cur_batch, model)
if feat_dset is None:
N = len(input_paths)
_, C, H, W = feats.shape
feat_dset = f.create_dataset('image_features', (N, H * W, C),
dtype=np.float32)
iid_dset = f.create_dataset('iids', (N,),
dtype=np.int64)

i1 = i0 + len(cur_batch)
feats_r = feats.reshape(-1, 2048, 49)
feat_dset[i0:i1] = np.transpose(feats_r, (0, 2, 1))
i0 = i1
print('Processed %d / %d images' % (i1, len(input_paths)))
cur_batch = []

if len(cur_batch) > 0:
feats = run_batch(cur_batch, model)
feats_r = feats.reshape(-1, 2048, 49)
i1 = i0 + len(cur_batch)
feat_dset[i0:i1] = np.transpose(feats_r, (0, 2, 1))
print('Processed %d / %d images' % (i1, len(input_paths)))
iid_dset[:len(iid)] = np.array(iid, dtype=np.int64)

feat_file = h5py.File(args.output_h5_file, 'r')

iid_list = feat_file['iids'][:]

iid2idx = {str(iid): idx for idx, iid in enumerate(iid_list)}
idx2iid = {idx: str(iid) for idx, iid in enumerate(iid_list)}

lut = dict()
lut['image_id_to_ix'] = iid2idx
lut['image_ix_to_id'] = idx2iid

json.dump(lut, open(f'{args.path}/map_tdiuc_resnet.json', 'w'))


if __name__ == '__main__':
args = parser.parse_args()
main(args)
Loading

0 comments on commit ba12f18

Please sign in to comment.