-
Notifications
You must be signed in to change notification settings - Fork 2
/
StanfordNLP.py
62 lines (50 loc) · 1.98 KB
/
StanfordNLP.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
'''
A sample code usage of the python package stanfordcorenlp to access a Stanford CoreNLP server.
Written as part of the blog post: https://www.khalidalnajjar.com/how-to-setup-and-use-stanford-corenlp-server-with-python/
'''
from stanfordcorenlp import StanfordCoreNLP
import logging
import json
class StanfordNLP:
def __init__(self, host='http://localhost', port=9000):
self.nlp = StanfordCoreNLP(host, port=port,
timeout=30000) # , quiet=False, logging_level=logging.DEBUG)
self.props = {
'annotators': 'tokenize,ssplit',
'pipelineLanguage': 'en',
'outputFormat': 'json'
}
def word_tokenize(self, sentence):
return self.nlp.word_tokenize(sentence)
def sent_tokenize(self, para):
return self.nlp.sent_tokenize(para)
def pos(self, sentence):
return self.nlp.pos_tag(sentence)
def ner(self, sentence):
return self.nlp.ner(sentence)
def parse(self, sentence):
return self.nlp.parse(sentence)
def dependency_parse(self, sentence):
return self.nlp.dependency_parse(sentence)
def annotate(self, sentence):
return json.loads(self.nlp.annotate(sentence, properties=self.props))
@staticmethod
def tokens_to_dict(_tokens):
tokens = defaultdict(dict)
for token in _tokens:
tokens[int(token['index'])] = {
'word': token['word'],
'lemma': token['lemma'],
'pos': token['pos'],
'ner': token['ner']
}
return tokens
if __name__ == '__main__':
sNLP = StanfordNLP()
text = 'A blog post using Stanford CoreNLP Server. Visit www.khalidalnajjar.com for more details.'
print("Annotate:", sNLP.annotate(text))
print("POS:", sNLP.pos(text))
print("Tokens:", sNLP.word_tokenize(text))
print("NER:", sNLP.ner(text))
print("Parse:", sNLP.parse(text))
print("Dep Parse:", sNLP.dependency_parse(text))