Skip to content

Commit

Permalink
reduce phraser memory
Browse files Browse the repository at this point in the history
  • Loading branch information
jenishah committed Oct 26, 2018
1 parent 242c80e commit bba2e46
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 3 deletions.
7 changes: 5 additions & 2 deletions gensim/models/phrases.py
Original file line number Diff line number Diff line change
Expand Up @@ -805,7 +805,7 @@ def __init__(self, phrases_model):
for bigram, score in phrases_model.export_phrases(corpus, self.delimiter, as_tuples=True):
if bigram in self.phrasegrams:
logger.info('Phraser repeat %s', bigram)
self.phrasegrams[bigram] = (None, score)
self.phrasegrams[bigram] = score
count += 1
if not count % 50000:
logger.info('Phraser added %i phrasegrams', count)
Expand Down Expand Up @@ -848,7 +848,10 @@ def score_item(self, worda, wordb, components, scorer):
"""
try:
return self.phrasegrams[tuple(components)][-1]
if list(self.phrasegrams.values())[0].__class__ is tuple:
return self.phrasegrams[tuple(components)][-1]
else:
return self.phrasegrams[tuple(components)]
except KeyError:
return -1

Expand Down
Binary file modified gensim/test/test_data/phraser_model_3dot6
Binary file not shown.
1 change: 0 additions & 1 deletion gensim/test/test_phrases.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import unittest

import six

import numpy as np

from gensim.utils import to_unicode
Expand Down

0 comments on commit bba2e46

Please sign in to comment.