Skip to content

Commit

Permalink
Fix issues with WordOccurenceAccumulatoron Windows. Fix #1441 (#1449)
Browse files Browse the repository at this point in the history
* #1441: Fix issues with `WordOccurenceAccumulator` on Windows.

* #1441: Use pre-scipy0.17 version of `scipy.sparse.diags` function by passing explicit `offset` parameter.
  • Loading branch information
macks22 authored and menshikh-iv committed Jun 26, 2017
1 parent dfd7da4 commit f52722e
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions gensim/topic_coherence/text_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,6 @@ def __init__(self, *args):
self._co_occurrences = sps.lil_matrix((self._vocab_size, self._vocab_size), dtype='uint32')

self._uniq_words = np.zeros((self._vocab_size + 1,), dtype=bool) # add 1 for none token
self._mask = self._uniq_words[:-1] # to exclude none token
self._counter = Counter()

def __str__(self):
Expand Down Expand Up @@ -251,9 +250,10 @@ def partial_accumulate(self, texts, window_size):

def analyze_text(self, window, doc_num=None):
self._slide_window(window, doc_num)
if self._mask.any():
self._occurrences[self._mask] += 1
self._counter.update(itertools.combinations(np.nonzero(self._mask)[0], 2))
mask = self._uniq_words[:-1] # to exclude none token
if mask.any():
self._occurrences[mask] += 1
self._counter.update(itertools.combinations(np.nonzero(mask)[0], 2))

def _slide_window(self, window, doc_num):
if doc_num != self._current_doc_num:
Expand All @@ -273,7 +273,8 @@ def _symmetrize(self):
"""
co_occ = self._co_occurrences
co_occ.setdiag(self._occurrences) # diagonal should be equal to occurrence counts
self._co_occurrences = co_occ + co_occ.T - sps.diags(co_occ.diagonal(), dtype='uint32')
self._co_occurrences = \
co_occ + co_occ.T - sps.diags(co_occ.diagonal(), offsets=0, dtype='uint32')

def _get_occurrences(self, word_id):
return self._occurrences[word_id]
Expand Down

0 comments on commit f52722e

Please sign in to comment.