-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
73 lines (60 loc) · 1.89 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from nltk import Tree
nominal_labels = ["NN", "NNS", "NNP", "NNPS", "PRP"]
pronouns = ["He", "he", "Him", "him", "She", "she", "Her", "her", "It", "it", "They", "they"]
reflexive_pronouns = ["Himself", "himself", "Herself", "herself", "Itself", "itself", "Themselves", "themselves"]
pronoun_numbers = {
"NN": "singular",
"NNP": "singular",
"he": "singular",
"she": "singular",
"him": "singular",
"her": "singular",
"it": "singular",
"himself": "singular",
"herself": "singular",
"itself": "singular",
"NNS": "plural",
"NNPS": "plural",
"they": "plural",
"them": "plural",
"themselves": "plural",
"PRP": None
}
male_p = ["he", "him", "himself"]
female_p = ["she", "her", "herself"]
neuter_p = ["it", "itself"]
def read_from_file(file_name):
if file_name and file_name != "":
with open(file_name) as f:
sentences = f.readlines()
return sentences
print("Error trying to read from file")
exit(-1)
def get_trees(file_name):
return [Tree.fromstring(s) for s in read_from_file(file_name)]
def get_pos(tree, node):
for pos in tree.treepositions():
if tree[pos] == node:
return pos
return None
def get_pronoun(tree, pos):
return tree[pos].leaves()[0].lower()
def get_dom_np(sents, pos):
# start with the last tree in sents
tree = sents[-1]
# get the NP's position by removing the last element from
# the pronoun's
dom_pos = pos[:-1]
return tree, dom_pos
def walk_up_to(tree, pos, targets):
path = [pos]
still_looking = True
while still_looking:
# climb one level up the tree by removing the last element
# from the current tree position
pos = pos[:-1]
path.append(pos)
# if an S node is encountered, return the path and pos
if tree[pos].label() in targets:
still_looking = False
return path, pos