This repository has been archived by the owner on Jan 2, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
index.js
55 lines (53 loc) · 2 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
const vocabulary = require('vocabulary-list-statistics');
const extract = require('extract-lemmatized-nonstop-words');
const { sum, max } = require('lodash');
const wordsTable = new Map();
vocabulary.forEach(item => {
wordsTable.set(item.word, item);
});
/**
*
* @param {String} text
*/
function grade(text) {
const tokens = extract(text, []);
const output = {
meta: {},
words: new Map(),
};
let ranks = [];
tokens.forEach(token => {
const vocabulary = wordsTable.get(token.vocabulary);
if (!vocabulary) {
ranks.push(0);
return;
}
// if (token.pos == 'NN' && vocabulary.rank > 5000) return;// It's proper noun
ranks.push(vocabulary.rank);
output.words.set(vocabulary.word, (output.words.get(vocabulary.word) || 0) + 1);
});
// Arithmetic mean of vocabulary ranks of the text.
output.meta.words = tokens.length;
output.meta.mean = sum(ranks) / tokens.length;
output.meta.max = max(ranks);
// Based on https://languagelearning.stackexchange.com/a/3065
output.meta.levels = {
A1: ranks.filter(rank => rank <= 600).length * 100 / ranks.length,
A2: ranks.filter(rank => rank <= 1200).length * 100 / ranks.length,
B1: ranks.filter(rank => rank <= 2500).length * 100 / ranks.length,
B2: ranks.filter(rank => rank <= 5000).length * 100 / ranks.length,
C1: ranks.filter(rank => rank <= 10000).length * 100 / ranks.length,
C2: ranks.filter(rank => rank <= 20000).length * 100 / ranks.length,
};
output.meta.grade = 'C2';
for (const level in output.meta.levels) {
if (output.meta.levels[level] >= 90) {
output.meta.grade = level;
break;
}
}
// output.meta.grade = output.meta.levels.C2 < 90 ? 'C2' : Object.keys(output.meta.levels)[Object.values(output.meta.levels).indexOf(100)];
output.words = [...output.words].sort((a, b) => a[1] <= b[1] ? 1 : -1);
return output;
}
module.exports = grade;