-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPerplexityInterpolationStrategy.cpp
70 lines (54 loc) · 1.43 KB
/
PerplexityInterpolationStrategy.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
/*
* PerplexityInterpolationStrategy.cpp
*
* Created on: Mar 31, 2017
* Author: louis
*/
#include "PerplexityInterpolationStrategy.h"
#include "LanguageModel.h"
#include <functional>
#include <algorithm>
#include <numeric>
#include <cmath>
namespace SLM {
PerplexityInterpolationStrategy::PerplexityInterpolationStrategy(SLM::LanguageModel& lm) : lm(&lm) {
// TODO Auto-generated constructor stub
}
PerplexityInterpolationStrategy::~PerplexityInterpolationStrategy() {
// TODO Auto-generated destructor stub
}
double PerplexityInterpolationStrategy::get(const Pattern& context)
{
int contextSize = context.size();
auto it = weights.find(context);
if(it != weights.end())
{
return it->second;
}
else
{
double entropySum = 0.0;
std::vector<unsigned int> occurrenceCounts = lm->getCounts(context);
unsigned int sum = std::accumulate ( occurrenceCounts.begin( ) , occurrenceCounts.end( ) , 0 ) ;
if(sum > 0)
{
for (auto count: occurrenceCounts)
{
if(count > 0)
{
double mle = (1.0*count)/(1.0*sum);
entropySum -= mle * log2(mle);
}
}
}
double perplexity = 1.0 / ( std::exp2(entropySum));
L_S << "Perplexity: get(" << contextSize << ") sum:" << sum << " entropysum:" << entropySum << " perplexity:" << perplexity << "\n";
weights[context] = perplexity;
return perplexity;
}
}
std::string PerplexityInterpolationStrategy::name() const
{
return "perplexity";
}
} /* namespace SLM */