-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdt-wordnet.py
executable file
·36 lines (27 loc) · 1.2 KB
/
dt-wordnet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/env python3
import argparse
import sys
parser = argparse.ArgumentParser()
parser.add_argument('-w', '--wordnet-super-senses', type=argparse.FileType('r', encoding='UTF-8'), required=True)
parser.add_argument('-t', type=float, default=0.01)
parser.add_argument('dt', type=argparse.FileType('r', encoding='UTF-8'))
parser.add_argument('-o', '--output', default=sys.stdout, type=argparse.FileType('w', encoding='UTF-8'))
args = parser.parse_args()
def read_super_senses_wordnet(f, operation=None):
lexicon = set()
for line in f:
_, _, word_pos_sid = line.partition('\t')
word_pos, _, _ = word_pos_sid.rpartition('.')
word, _, pos = word_pos.rpartition('.')
if 'n' == pos:
if operation is None:
lexicon.add(word)
else:
lexicon.add(operation(word))
return lexicon
lexicon = read_super_senses_wordnet(args.wordnet_super_senses)
for line in args.dt:
word1, word2, weight = line.rstrip().split('\t', 2)
word1, word2, weight = word1.lower(), word2.lower(), float(weight)
if word1 in lexicon and word2 in lexicon and weight > args.t:
print('%s\t%s\t%f' % (word1, word2, weight), file=args.output)