-
Notifications
You must be signed in to change notification settings - Fork 0
/
save_pca_tsne_output.py
82 lines (64 loc) · 2.27 KB
/
save_pca_tsne_output.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
from urllib import request
import tarfile
import scipy.io
import numpy as np
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import pandas as pd
DATA_DIR = 'data'
FLOWER_DIR = os.path.join(DATA_DIR, 'flowers')
FLOWER_TGZ = FLOWER_DIR + '.tgz'
LABELS_FILE = os.path.join(DATA_DIR, 'imagelabels.mat')
def create_dir_if_not_exists(_dir):
if not os.path.exists(_dir):
os.mkdir(_dir)
def download_data_if_not_exists():
if not os.path.exists(FLOWER_DIR):
if not os.path.exists(FLOWER_TGZ):
_url = 'https://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz'
request.urlretrieve(_url, filename=FLOWER_TGZ)
request.urlretrieve('https://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat', LABELS_FILE)
tar = tarfile.open(FLOWER_TGZ, "r:gz")
create_dir_if_not_exists(FLOWER_DIR)
tar.extractall(FLOWER_DIR)
tar.close()
def get_labels():
return scipy.io.loadmat(LABELS_FILE)['labels'][0]
def get_images():
return [os.path.join(FLOWER_DIR, 'jpg', x) for x in os.listdir(os.path.join(FLOWER_DIR, 'jpg'))]
def get_embeds():
import keras_flower as kf
_file = 'embeds.npy'
if os.path.exists(_file):
embeds = np.load(file=_file, allow_pickle=True)
else:
download_data_if_not_exists()
embeds = [kf.embed_by_path(_x) for _x in get_images()]
np.save(_file, embeds, allow_pickle=True)
return embeds
def get_pca():
_file = 'pca_embeds.npy'
if os.path.exists(_file):
pca_values = np.load(_file, allow_pickle=True)
else:
_embeds = get_embeds()
pca_values = PCA(n_components=3).fit_transform(_embeds)
np.save(_file, pca_values, allow_pickle=True)
return pca_values
def get_tsne():
_file = 'tsne_embeds.npy'
if os.path.exists(_file):
tsne_values = np.load(_file, allow_pickle=True)
else:
_embeds = get_embeds()
tsne_values = TSNE(n_components=3).fit_transform(_embeds)
np.save(_file, tsne_values, allow_pickle=True)
return tsne_values
if __name__ == '__main__':
create_dir_if_not_exists(DATA_DIR)
pca_df = pd.DataFrame(data=get_pca())
print(pca_df)
tsne_df = pd.DataFrame(data=get_tsne())
print(tsne_df)
pass