-
Notifications
You must be signed in to change notification settings - Fork 0
/
imdb_data_preparation.py
61 lines (50 loc) · 2.57 KB
/
imdb_data_preparation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# -*- coding: utf-8 -*-
"""
Created Jun 8 2020
@author: Maksym Komarov
"""
#Подготовка датасета IMDB. Классы - целые числа в промежутке [0, 99]
#Любое количество классов с равными промежутками между ними
from tarfile import TarFile
import os
import shutil
import datetime as dt
from scipy.io import loadmat
#import sys
#sys.path.insert(1, '/content/gdrive/My Drive/Age_prediction') #path to dirtools.py
from dirtools import create_directory
from dirtools import get_agedir
from dirtools import get_classes
def prepare_imdb_data(path_tar_data, path_tar_metadata, sorted_data_locate, \
period = 10, not_sorted_data_locate = None, metadata_locate = None):
classes = get_classes(period)
if not_sorted_data_locate == None:
not_sorted_data_locate = 'imdb_data_not_sorted_workfile_will_be_deleted'
if metadata_locate == None:
metadata_locate = 'imdb_metadata_workfile_will_be_deleted'
train_dir = sorted_data_locate #path to locate sorted data
data_not_sorted = not_sorted_data_locate #path to locate not sorted data
metadata_path = metadata_locate #path to locate metadata
with TarFile(path_tar_data,"r") as mytar:
mytar.extractall(data_not_sorted)
with TarFile(path_tar_metadata,"r") as mytar:
mytar.extractall(metadata_path)
mat = loadmat(metadata_path + '/imdb/imdb.mat')
#mat['imdb'][0][0][0][0] #dob #days from January 1, 0000
#mat['imdb'][0][0][1][0] #yearphoto
#mat['imdb'][0][0][2][0] #path: array([array(['01/nm0000001_rm124825600_1899-5-10_1968.jpg'],
create_directory(train_dir, classes)
source_dir = data_not_sorted + '/imdb_crop'
dest_dir = train_dir
for i in range(len(mat['imdb'][0][0][1][0])):
impath = str(mat['imdb'][0][0][2][0][i][0]) #path to image
dob = dt.datetime.fromtimestamp(mat['imdb'][0][0][0][0][i] * 60 * 24 * 60).year - 1970 #date of birth
apparent_age = mat['imdb'][0][0][1][0][i] - dob #YEARS OLD!
if apparent_age >= 0 and apparent_age <= 99:
directory = get_agedir(apparent_age, classes)
shutil.copy2(os.path.join(source_dir, impath),
os.path.join(dest_dir, directory))
if os.path.exists('imdb_data_not_sorted_workfile_will_be_deleted'):
shutil.rmtree('imdb_data_not_sorted_workfile_will_be_deleted')
if os.path.exists('imdb_metadata_workfile_will_be_deleted'):
shutil.rmtree('imdb_metadata_workfile_will_be_deleted')