-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSLM-ppl.cpp
81 lines (65 loc) · 2.25 KB
/
SLM-ppl.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
//============================================================================
// Name : SLM.cpp
// Author :
// Version :
// Copyright : Your copyright notice
// Description : Hello World in C, Ansi-style
//============================================================================
#include <stdio.h>
#include <stdlib.h>
#include <tuple>
#include "Logging.h"
#include "ProgramOptions.h"
#include "BackoffStrategies.h"
#include "LanguageModel.h"
#include "Utils.h"
#include "Timer.h"
int main(int argc, char** argv) {
SLM::ProgramOptions po(argc, argv);
SLM::LanguageModel lm(po);
SLM::BackoffStrategies bo(po, lm);
SLM::ProgressTimer pt;
for(std::string inputFile : po.getTestInputFiles())
{
L_V << "SLM: Reading " << inputFile << "\n";
std::ifstream file(inputFile);
bo.nextFile();
// pt.nextFile();
std::string retrievedString;
while(std::getline(file, retrievedString))
{
if(po.addSentenceMarkers())
{
retrievedString = "<s> <s> " + retrievedString;
}
bo.nextLine();
// pt.nextLine();
std::vector<std::string> words = whitespaceTokeniser(retrievedString);
for(int i = (4-1); i < words.size(); ++i)
{
std::stringstream contextStream;
contextStream << words[i-(4-1)];
for(int ii = 1; ii < 4 - 1; ++ii)
{
contextStream << " " << words[i-(4-1)+ii];
}
try
{
Pattern context = lm.toPattern(contextStream.str());
Pattern focus = lm.toPattern(words[i]);
L_P << "SLM: [" << lm.toString(context) << "] " << lm.toString(focus) << "\n";
bo.prob(context, focus, lm.isOOV(focus));
// pt.nextPattern();
// pt.toString();
} catch (const UnknownTokenError &e)
{
std::cerr << "Unknown token error: " << contextStream.str() << " " << words[i] << std::endl;
continue;
}
}
}
}
std::cout << "DONE" << std::endl;
bo.done();
return EXIT_SUCCESS;
}