-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtop15000.js
57 lines (50 loc) · 1.58 KB
/
top15000.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
'use strict';
const Promise = require('bluebird');
const debug = require('debug')('top15000');
const _ = require('lodash');
const fs = require('fs');
const Storage = require('./storage');
const collectionName = 'top15000';
/**
* Path to vocabulary file
* @type {string}
*/
const filePath = './data/top15000.txt';
const fetchTerms = function(count, offset) {
return Storage.find(collectionName, null, null, count, offset);
};
/**
* Writes words from file into database
* @returns {Promise}
*/
const populate = function() {
let data = fs.readFileSync(filePath, {encoding: 'utf-8'});
let words = [];
let lines = data.split("\n");
lines.forEach(function(line) {
// Hadling lines like: domain сфера (интересов),
// where "domain" should go in term, and "сфера (интересов)" should hgo in translation
let match = _.trim(line).split(' ');
let term = match.shift();
let translation = match.join(' ');
// Include all words except short words
if (term.length >= 3) {
let word = {term: term};
if (translation) {
word.translation = translation;
}
words.push(word);
}
});
// Shuffle words but make sure that words with translation bubble to the top.
words = _.shuffle(words);
words = _.sortBy(words, 'translation');
return Storage.remove(collectionName)
.then(function() {
return Storage.insert(collectionName, words);
});
};
module.exports = {
populate: populate,
fetchTerms: fetchTerms
};