-
Notifications
You must be signed in to change notification settings - Fork 22
/
stats.py
33 lines (25 loc) · 900 Bytes
/
stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
__author__='thiagocastroferreira'
"""
Author: Thiago Castro Ferreira
Date: 15/07/2018
Description:
Script to provide statistics about the dataset and the extracted information
PYTHON VERSION: 2.7
"""
def run(entryset):
print(10 * '*')
print('Number of Sets: ', len(entryset))
lexsize = 0
templates, templates_de = [], []
references, references_de = [], []
entities = []
for entry in entryset:
lexsize += len(entry.lexEntries)
for lex in entry.lexEntries:
templates.append(lex.template)
if lex.template_de:
templates_de.append(lex.template_de)
references.extend(lex.references)
# references_de.extend(lex.references_de)
entities.extend(list(map(lambda reference: reference.entity, lex.references)))
return lexsize, templates, templates_de, entities, references