forked from Serenitas/topic-modeller
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
25 lines (20 loc) · 809 Bytes
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import corpora_builder, ngram_adapter, coefficient_calculator
from time import time
directory = 'it_texts'
multiword_only_file = 'multiword_only.txt'
out_file = 'corpora.txt'
out_dictionary = 'dict.txt'
out_lemmatized = 'lemmed.txt'
out_ngrams = 'ngrams.txt'
out_ngrams_by_doc = 'doc_ngrams.txt'
#start = time()
#print("Building corpora")
corpora_builder.build_corpora(directory)
#print("Adapting ngrams")
#ngram_adapter.adapt_ngrams(out_ngrams, out_dictionary, 'result.txt')
#print("Printing ngrams by doc")
#corpora_builder.print_ngrams_by_doc(out_ngrams_by_doc, multiword_only_file, out_lemmatized)
#print("Time:", int((time() - start) / 60), 'min', int((time() - start) % 60), 'sec')
#coeffs = coefficient_calculator.calc_coeffs()
coefficient_calculator.experiment('lemmed.txt', 0, -3)
#0, -2.6 0, -3