-
Notifications
You must be signed in to change notification settings - Fork 57
/
coco_val_compact.py
36 lines (25 loc) · 1.03 KB
/
coco_val_compact.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import h5py
from tqdm import tqdm
import json
import pathlib
import argparse
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--batchsize', default=1, type=int, help='batch_size')
parser.add_argument('--data_dir', type=str,
default='/ssd-playpen/home/jmincho/workspace/datasets/')
args = parser.parse_args()
data_dir = pathlib.Path(args.data_dir).resolve()
coco_dir = data_dir.joinpath('COCO')
with open(data_dir.joinpath('lxmert/mscoco_resplit_val.json'))as f:
val_data = json.load(f)
print(len(val_data))
source_f = h5py.File(coco_dir.joinpath('features/val2014_obj36.h5'), 'r')
target_f = h5py.File(coco_dir.joinpath('features/resplit_val_obj36.h5'), 'w')
img_id = val_data[0]['img_id']
keys = list(source_f[img_id].keys())
for datum in tqdm(val_data, ncols=50):
img_id = datum['img_id']
grp = target_f.create_group(str(img_id))
for k in keys:
grp[k] = source_f[f'{img_id}/{k}'][()]