-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathnatasha.py
54 lines (40 loc) · 1.3 KB
/
natasha.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from dependency_paraphraser.pretrained_projectors import natasha_projector
from natasha import (
Segmenter,
MorphVocab,
NewsEmbedding,
NewsMorphTagger,
NewsSyntaxParser,
Doc,
)
from dependency_paraphraser import projection, synonyms
segmenter = Segmenter()
morph_vocab = MorphVocab()
emb = NewsEmbedding()
morph_tagger = NewsMorphTagger(emb)
syntax_parser = NewsSyntaxParser(emb)
gensim_emb = None
def use_news_embeddings():
""" Convert navec embeddings to gensim format to use for synonym replacement """
global gensim_emb
gensim_emb = emb.as_gensim
def paraphrase(text, tree_temperature=0.5, w2v=None, min_sim=0.5, p_rep=0.5, projector=natasha_projector):
doc = Doc(text)
doc.segment(segmenter)
doc.tag_morph(morph_tagger)
doc.parse_syntax(syntax_parser)
if w2v is None:
w2v = gensim_emb
results = []
for sent in doc.sents:
toks = projection.make_tree_projection(
sent, model=projector, temperature=tree_temperature,
)
if w2v:
words = synonyms.replace_synonyms(
toks, w2v=w2v, morph_vocab=morph_vocab, min_sim=min_sim, p_rep=p_rep,
)
else:
words = [token.text for token in toks]
results.append(' '.join(words))
return ' '.join(results)