-
Notifications
You must be signed in to change notification settings - Fork 1
/
CostCalc.cpp
82 lines (70 loc) · 2.32 KB
/
CostCalc.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#include "CostCalc.h"
// Compute the Gini Score for a split dataset
double CostCalc::Gini::getGiniScore(DataSet *data, double instances)
{
double score = 0.0;
vector<int> cnt(Data::LABEL.size(), 0);
for (int index = 0; index < data->size(); index++)
for (int label = 0; label < Data::LABEL.size(); label++)
if (data->at(index)->label == Data::LABEL[label])
{
++cnt[label];
break;
}
//Buff label B
cnt[2] *= 5;
int size = 0;
for (int label = 0; label < Data::LABEL.size(); label++)
size += cnt[label];
if (size == 0)
return 0.0;
for (int label = 0; label < Data::LABEL.size(); label++)
{
double proportion = (double)cnt[label] / size;
score += proportion * proportion;
}
return (1.0 - score) * ((double)size / instances);
}
// Compute the Gini index for a group of split dataset
double CostCalc::Gini::getGiniIndex(GroupDataSet *group, double instances)
{
return CostCalc::Gini::getGiniScore(group->first, instances) + CostCalc::Gini::getGiniScore(group->second, instances);
}
// Compute the Gini Score for a split dataset
double CostCalc::Entropy::getEntropyScore(DataSet *data)
{
int numberOfData = data->size();
vector<int> count(Data::LABEL.size(), 0);
double result = 0;
for (int i = 0; i < numberOfData; i++)
{
char label = data->at(i)->label;
for (int j = 0; j < Data::LABEL.size(); j++)
{
if (label == Data::LABEL.at(j))
{
count[j]++;
break;
}
}
}
//Buff label B
count[2] *= 5;
numberOfData = 0;
for (int label = 0; label < Data::LABEL.size(); label++)
numberOfData += count[label];
double entropy = 0.0;
for (int i = 0; i < Data::LABEL.size(); i++)
{
double temp = count[i] * 1.0 / numberOfData;
if (temp != 0)
entropy -= temp * log2(temp);
}
return entropy;
}
// Compute the Entropy index for a group of split dataset
double CostCalc::Entropy::getEntropyIndex(GroupDataSet *group, double instances)
{
return group->first->size() * CostCalc::Entropy::getEntropyScore(group->first) / instances
+ group->second->size() * CostCalc::Entropy::getEntropyScore(group->second) / instances;
}