forked from noameshed/novelty-detection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
iNaturalist_stats.py
58 lines (49 loc) · 1.55 KB
/
iNaturalist_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# explore data statistics
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
f = 'D:/noam_/Cornell/CS7999/iNaturalist/train_val_images/'
grp_names = []
grp_count = []
grp_min = np.inf
min_folder = ''
grp_max = 0
max_folder = ''
avg_folder = 0
counter = 0
for i, bio_grp in enumerate(os.listdir(f)):
class_path = f + bio_grp + '/'
grp_count.append(0)
for clss in os.listdir(class_path):
pics = len(os.listdir(class_path + clss + '/'))
if pics > grp_max:
grp_max = pics
max_folder = clss
if pics < grp_min:
grp_min = pics
min_folder = clss
avg_folder += pics
grp_count[i] += pics
counter += 1
grp_names.append(bio_grp)
avg_folder/=counter
print('smallest folder (%s) has %d images' %(min_folder, grp_min))
print('biggest folder (%s) has %d images' %(max_folder, grp_max))
print('average folder size is %d' %(round(avg_folder)))
'''
Results printed:
smallest folder (Datana ministra) has 14 images
biggest folder (Danaus plexippus) has 3949 images
average folder size is 133
'''
# Plot number of images per class
ax = sns.barplot(grp_names, grp_count)
ax.set_title('Distribution of Images by Biological Group')
ax.set_xlabel('Biological Group')
ax.set_ylabel('Number of images')
for p in ax.patches:
ax.annotate("%d" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
ha='center', va='center', fontsize=8, color='black', xytext=(0, 4),
textcoords='offset points')
plt.show()