Added support for TDIUC

tyler-hayes · Aug 14, 2020 · ba12f18 · ba12f18
1 parent 59bde34
commit ba12f18
Show file tree

Hide file tree

Showing 14 changed files with 517 additions and 10 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,6 @@
+# Default ignored files
+/workspace.xml
+logs/
+vqa_experiments/__pycache__
+snapshots
+*__pycache__
diff --git a/README.md b/README.md
@@ -100,6 +100,16 @@ We save out incremental weights and associated data for REMIND after each evalua
 3. Run `run_imagenet_experiment.sh`
 
 ## Training REMIND on VQA Datasets
+We use the gensen library for question features. Execute the following steps to set it up:
+```
+cd ${GENSENPATH} 
+git clone git@github.com:erobic/gensen.git
+cd ${GENSENPATH}/data/embedding
+chmod +x glove25.sh && ./glove2h5.sh    
+cd ${GENSENPATH}/data/models
+chmod +x download_models.sh && ./download_models.sh
+```
+
 ### Training REMIND on CLEVR 
 _Note: For convenience, we pre-extract all the features including the PQ encoded features. This requires 140 GB of free space._
 1. Download and extract CLEVR images+annotations:
@@ -130,7 +140,34 @@ _Note: For convenience, we pre-extract all the features including the PQ encoded
     - In `pq_encoding_clevr.py`, change the value of `PATH` and `streaming_type` (as either 'iid' or 'qtype')
     - Train PQ encoder and extract features: `python vqa_experiments/clevr/pq_encoding_clevr.py`
 
+4. Train REMIND
+    - Edit `data_path` in `vqa_experiments/configs/config_CLEVR_streaming.py`
+    - Run `./vqa_experiments/run_clevr_experiment.sh` (Set `DATA_ORDER` to either `qtype` or `iid` to define the data order)
+
 ### Training REMIND on TDIUC
+1. Download TDIUC
+    ```
+    cd ${TDIUC_PATH}
+    wget https://kushalkafle.com/data/TDIUC.zip && unzip TDIUC.zip
+    cd TDIUC && python setup.py --download Y # You may need to change print '' statements to print('')
+    ```
+
+2. Extract question features    
+    - Edit `vqa_experiments/clevr/extract_question_features_tdiuc.py`, changing the `DATA_PATH` variable to point to TDIUC dataset and `GENSEN_PATH` to point to gensen repository and extract features:
+    `python vqa_experiments/tdiuc/extract_question_features_tdiuc.py`
+
+    - Pre-process the TDIUC questions
+    Edit `$PATH` variable in `vqa_experiments/clevr/preprocess_tdiuc.py` file, pointing it to the directory where TDIUC was extracted
+
+3. Extract image features, train PQ encoder and extract encoded features 
+    - Extract image features: `python -u vqa_experiments/tdiuc/extract_image_features_tdiuc.py --path /path/to/TDIUC`
+    - In `pq_encoding_tdiuc.py`, change the value of `PATH` and `streaming_type` (as either 'iid' or 'qtype')
+    - Train PQ encoder and extract features: `python vqa_experiments/clevr/pq_encoding_clevr.py`
+
+4. Train REMIND
+    - Edit `data_path` in `vqa_experiments/configs/config_TDIUC_streaming.py`
+    - Run `./vqa_experiments/run_tdiuc_experiment.sh` (Set `DATA_ORDER` to either `qtype` or `iid` to define the data order)
+
 
 ## Citation
 If using this code, please cite our paper.

diff --git a/vqa_experiments/clevr/pq_encoding_clevr.py b/vqa_experiments/clevr/pq_encoding_clevr.py
@@ -6,11 +6,13 @@
 import h5py
 import json
 
-PATH = '/hdd/robik/CLEVR'
+# Change these based on data set
+PATH = '/hdd/robik/CLEVR'  # Change this
+streaming_type = 'iid'  # Change this
+
 feat_name = f'{PATH}/all_clevr_resnet_largestage3'
 train_filename = f'{PATH}/train_clevr.h5'
 lut_name = f'{PATH}/map_clevr_resnet_largestage3.json'
-streaming_type = 'iid'
 
 feat_dim = 1024
 num_feat_maps = 196

diff --git a/vqa_experiments/clevr/preprocess_clevr.py b/vqa_experiments/clevr/preprocess_clevr.py
@@ -55,7 +55,7 @@
     most_common = Counter(meta[m]).most_common()
     lut[f'{m}2idx'] = {a[0]: idx for idx, a in enumerate(most_common)}
 
-json.dump(lut, open('LUT_clevr.json', 'w'))
+json.dump(lut, open(f'{LUT_tdiuc}/LUT_clevr.json', 'w'))
 # %%
 dt = h5py.special_dtype(vlen=str)
 for split in ['train', 'val']:

diff --git a/vqa_experiments/configs/config_CLEVR_streaming.py b/vqa_experiments/configs/config_CLEVR_streaming.py
@@ -32,8 +32,6 @@
 test_on = 'full'  # 'full' or 'valid'
 
 arrangement = dict()
-arrangement['train'] = 'random'  # 'random', 'aidx', 'atypeidx', 'qtypeidx'
-arrangement['val'] = 'random'  # 'random', 'aidx', 'atypeidx', 'qtypeidx'
 
 # How many to train/test on
 # How many of "indices" to train on, E.g., if arrangement is ans_class, it refers
@@ -83,7 +81,7 @@
 num_hidden = 1024
 use_model = s_mac.sMacNetwork  # BLAH
 optimizer = torch.optim.Adamax
-lr = 1e-4
+lr = 3e-4
 save_models = False
 if not soft_targets:
     train_on = 'valid'

diff --git a/vqa_experiments/configs/config_TDIUC_streaming.py b/vqa_experiments/configs/config_TDIUC_streaming.py
@@ -0,0 +1,81 @@
+"""
+Written by Kushal, modified by Robik
+"""
+import vqa_experiments.vqa_models as vqa_models
+import torch
+from vqa_experiments.dictionary import Dictionary
+import sys
+from vqa_experiments.vqa_models import WordEmbedding
+from vqa_experiments.s_mac import s_mac
+
+# Model and runtime configuration choices. A copy of this will be saved along with the model
+# weights so that it is easy to reproduce later.
+
+# Data
+data_path = '/hdd/robik/TDIUC'
+dataset = 'tdiuc'
+img_feat = 'resnetpq_iid'  # updn, resnet, updnmkii, resnetmkii
+mkii = False  # If you want to also load codebook indices
+data_subset = 1.0
+d = Dictionary.load_from_file(f'vqa_experiments/data/dictionary_{dataset}.pkl')
+
+map_path = f'{data_path}/map_tdiuc_resnet.json'
+
+train_file = f'{data_path}/train_{dataset}.h5'
+val_file = f'{data_path}/val_{dataset}.h5'
+
+train_batch_size = 512
+val_batch_size = 512
+num_classes = 1480  # Number of classifier units 1480 for TDIUC, 31xx for VQA,28 for CLEVR
+
+train_on = 'full'
+test_on = 'full'  # 'full' or 'valid'
+
+arrangement = dict()
+
+only_first_k = dict()
+only_first_k['train'] = sys.maxsize  # Use sys.maxsize to load all
+only_first_k['val'] = sys.maxsize  # Use sys.maxsize to load all
+
+qnorm = True  # Normalize ques feat?
+imnorm = True  # Normalize img feat?
+
+shuffle = False
+
+fetch_all = False
+
+if fetch_all:  # For ques_type, ans_class or ans_type arrangement, get all qualifying data
+    assert (not shuffle)
+    train_batch_size = 1
+    val_batch_size = 1  # Dataset[i] will return all qualifying data of idx 1
+
+load_in_memory = False
+use_all = False
+use_pooled = False
+use_lstm = True
+
+# Training
+overwrite_expt_dir = True  # Set to True during dev phase
+max_epochs = 20
+test_interval = 8
+
+# Model
+attn_type = 'old'  # new or old
+num_attn_hops = 2
+soft_targets = False
+bidirectional = True
+lstm_out = 512
+emb_dim = 300
+cnn_feat_size = 2048  # 2048 for resnet/updn/clevr_layer4 ; 1024 for clevr layer_3
+
+classfier_dropout = True
+embedding_dropout = True
+attention_dropout = True
+num_hidden = 1024
+use_model = vqa_models.UpDown  # BLAH
+optimizer = torch.optim.Adamax
+lr = 2e-3
+save_models = False
+if not soft_targets:
+    train_on = 'valid'
+num_rehearsal_samples = 50
diff --git a/vqa_experiments/metric.py b/vqa_experiments/metric.py
@@ -66,7 +66,41 @@ def compute_clevr_per_type_accuracies(path, preds):
     print(some_qids)
 
 
+def compute_tdiuc_accuracy(PATH, preds):
+    gt_answers = h5py.File(f'{PATH}/val_tdiuc.h5')['aidx'][:]
+    gt_qids = h5py.File(f'{PATH}/val_tdiuc.h5')['qid'][:]
+    gt_qtypes = h5py.File(f'{PATH}/val_tdiuc.h5')['qtypeidx'][:]
+
+    qid2qtype = {qid: gt for qid, gt in zip(gt_qids, gt_qtypes)}
+    qid2gt = {qid: gt for qid, gt in zip(gt_qids, gt_answers)}
+
+    acc = defaultdict(list)
+
+    for qid in qid2gt:
+        gt = qid2gt[qid]
+        qtype = qid2qtype[qid]
+        if gt == preds[str(qid)]:
+            acc['overall'].append(1)
+            acc[qtype].append(1)
+        else:
+            acc['overall'].append(0)
+            acc[qtype].append(0)
+
+    mpt = 0
+    overall = 0
+    for k in acc:
+        if k == 'overall':
+            overall = sum(acc[k]) / len(acc[k])
+        else:
+            mpt += sum(acc[k]) / len(acc[k])
+    mpt = mpt / 12
+
+    return mpt, overall
+
+
 def compute_accuracy(path, dataset, preds):
     if dataset == 'clevr':
         mpt, overall = compute_clevr_accuracy(path, preds)
+    elif dataset == 'tdiuc':
+        mpt, overall = compute_tdiuc_accuracy(path, preds)
     print(f"Mean Per Type: {mpt}, Overall: {overall}")
diff --git a/vqa_experiments/run_clevr_experiment.sh b/vqa_experiments/run_clevr_experiment.sh
@@ -10,7 +10,7 @@ export PYTHONPATH=/hdd/robik/projects/REMIND
 #--expt_name ${expt} \
 #--stream_with_rehearsal \
 #--lr ${lr} &> logs/${expt}.log &
-DATA_ORDER=iid # or qtype
+DATA_ORDER=qtype # or qtype
 expt=${CONFIG}_${DATA_ORDER}_${lr}
 
 CUDA_VISIBLE_DEVICES=0 python -u vqa_experiments/vqa_trainer.py \

diff --git a/vqa_experiments/run_tdiuc_experiment.sh b/vqa_experiments/run_tdiuc_experiment.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+#source activate remind_proj
+
+lr=2e-3
+CONFIG=TDIUC_streaming
+export PYTHONPATH=/hdd/robik/projects/REMIND
+
+#DATA_ORDER=iid
+#expt=${CONFIG}_${DATA_ORDER}_${lr}
+
+#CUDA_VISIBLE_DEVICES=0 nohup python -u vqa_experiments/vqa_trainer.py \
+#--config_name ${CONFIG} \
+#--expt_name ${expt} \
+#--stream_with_rehearsal \
+#--data_order ${DATA_ORDER} \
+#--lr ${lr} &> logs/${expt}.log &
+
+DATA_ORDER=qtype # or qtype
+expt=${CONFIG}_${DATA_ORDER}_${lr}
+
+CUDA_VISIBLE_DEVICES=0 python -u vqa_experiments/vqa_trainer.py \
+--config_name ${CONFIG} \
+--expt_name ${expt} \
+--stream_with_rehearsal \
+--data_order ${DATA_ORDER} \
+--lr ${lr}
diff --git a/vqa_experiments/tdiuc/extract_image_features_tdiuc.py b/vqa_experiments/tdiuc/extract_image_features_tdiuc.py
@@ -0,0 +1,122 @@
+import argparse, os
+import h5py
+import numpy as np
+from scipy.misc import imread, imresize
+
+import torch
+import torchvision
+import json
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--path', type=str, default='/hdd/robik/TDIUC')
+parser.add_argument('--max_images', default=None, type=int)
+
+parser.add_argument('--image_height', default=224, type=int)
+parser.add_argument('--image_width', default=224, type=int)
+
+parser.add_argument('--model', default='resnet152')
+parser.add_argument('--model_stage', default=4, type=int)
+parser.add_argument('--batch_size', default=128, type=int)
+
+
+def build_model(args):
+    if not hasattr(torchvision.models, args.model):
+        raise ValueError('Invalid model "%s"' % args.model)
+    if not 'resnet' in args.model:
+        raise ValueError('Feature extraction only supports ResNets')
+    cnn = getattr(torchvision.models, args.model)(pretrained=True)
+    layers = [
+        cnn.conv1,
+        cnn.bn1,
+        cnn.relu,
+        cnn.maxpool,
+    ]
+    for i in range(args.model_stage):
+        name = 'layer%d' % (i + 1)
+        layers.append(getattr(cnn, name))
+    model = torch.nn.Sequential(*layers)
+    model.cuda()
+    model.eval()
+    return model
+
+
+def run_batch(cur_batch, model):
+    mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3, 1, 1)
+    std = np.array([0.229, 0.224, 0.224]).reshape(1, 3, 1, 1)
+
+    image_batch = np.concatenate(cur_batch, 0).astype(np.float32)
+    image_batch = (image_batch / 255.0 - mean) / std
+    image_batch = torch.FloatTensor(image_batch).cuda()
+    with torch.no_grad():
+        feats = model(image_batch)
+        feats = feats.data.cpu().clone().numpy()
+        return feats
+
+
+def path2iid(path):
+    return int(path.split('/')[-1].split('.')[0].split('_')[-1])
+
+
+def main(args):
+    args.output_h5_file = args.path + "/all_tdiuc_resnet.h5"
+    p1 = f'{args.path}/Images/train2014'
+    input_paths = [os.path.join(p1, a) for a in os.listdir(p1)]
+
+    p1 = f'{args.path}/Images/val2014'
+    input_paths.extend([os.path.join(p1, a) for a in os.listdir(p1)])
+
+    model = build_model(args)
+    img_size = (args.image_height, args.image_width)
+    with h5py.File(args.output_h5_file, 'w') as f:
+        feat_dset = None
+        i0 = 0
+        cur_batch = []
+        iid = []
+        for i, path in enumerate(input_paths):
+            iid.append(path2iid(path))
+            img = imread(path, mode='RGB')
+            img = imresize(img, img_size, interp='bicubic')
+            img = img.transpose(2, 0, 1)[None]
+            cur_batch.append(img)
+            if len(cur_batch) == args.batch_size:
+                feats = run_batch(cur_batch, model)
+                if feat_dset is None:
+                    N = len(input_paths)
+                    _, C, H, W = feats.shape
+                    feat_dset = f.create_dataset('image_features', (N, H * W, C),
+                                                 dtype=np.float32)
+                    iid_dset = f.create_dataset('iids', (N,),
+                                                dtype=np.int64)
+
+                i1 = i0 + len(cur_batch)
+                feats_r = feats.reshape(-1, 2048, 49)
+                feat_dset[i0:i1] = np.transpose(feats_r, (0, 2, 1))
+                i0 = i1
+                print('Processed %d / %d images' % (i1, len(input_paths)))
+                cur_batch = []
+
+        if len(cur_batch) > 0:
+            feats = run_batch(cur_batch, model)
+            feats_r = feats.reshape(-1, 2048, 49)
+            i1 = i0 + len(cur_batch)
+            feat_dset[i0:i1] = np.transpose(feats_r, (0, 2, 1))
+            print('Processed %d / %d images' % (i1, len(input_paths)))
+        iid_dset[:len(iid)] = np.array(iid, dtype=np.int64)
+
+        feat_file = h5py.File(args.output_h5_file, 'r')
+
+        iid_list = feat_file['iids'][:]
+
+        iid2idx = {str(iid): idx for idx, iid in enumerate(iid_list)}
+        idx2iid = {idx: str(iid) for idx, iid in enumerate(iid_list)}
+
+        lut = dict()
+        lut['image_id_to_ix'] = iid2idx
+        lut['image_ix_to_id'] = idx2iid
+
+        json.dump(lut, open(f'{args.path}/map_tdiuc_resnet.json', 'w'))
+
+
+if __name__ == '__main__':
+    args = parser.parse_args()
+    main(args)