From f52722e4e38f2bac0fc479150a7467cb5d89eaf1 Mon Sep 17 00:00:00 2001
From: Mack <macks22@users.noreply.github.com>
Date: Mon, 26 Jun 2017 11:12:00 -0400
Subject: [PATCH] Fix issues with `WordOccurenceAccumulator`on Windows.  Fix
 #1441 (#1449)

* #1441: Fix issues with `WordOccurenceAccumulator` on Windows.

* #1441: Use pre-scipy0.17 version of `scipy.sparse.diags` function by passing explicit `offset` parameter.
---
 gensim/topic_coherence/text_analysis.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/gensim/topic_coherence/text_analysis.py b/gensim/topic_coherence/text_analysis.py
index a44e57fb3e..1be0574d7b 100644
--- a/gensim/topic_coherence/text_analysis.py
+++ b/gensim/topic_coherence/text_analysis.py
@@ -221,7 +221,6 @@ def __init__(self, *args):
         self._co_occurrences = sps.lil_matrix((self._vocab_size, self._vocab_size), dtype='uint32')
 
         self._uniq_words = np.zeros((self._vocab_size + 1,), dtype=bool)  # add 1 for none token
-        self._mask = self._uniq_words[:-1]  # to exclude none token
         self._counter = Counter()
 
     def __str__(self):
@@ -251,9 +250,10 @@ def partial_accumulate(self, texts, window_size):
 
     def analyze_text(self, window, doc_num=None):
         self._slide_window(window, doc_num)
-        if self._mask.any():
-            self._occurrences[self._mask] += 1
-            self._counter.update(itertools.combinations(np.nonzero(self._mask)[0], 2))
+        mask = self._uniq_words[:-1]  # to exclude none token
+        if mask.any():
+            self._occurrences[mask] += 1
+            self._counter.update(itertools.combinations(np.nonzero(mask)[0], 2))
 
     def _slide_window(self, window, doc_num):
         if doc_num != self._current_doc_num:
@@ -273,7 +273,8 @@ def _symmetrize(self):
         """
         co_occ = self._co_occurrences
         co_occ.setdiag(self._occurrences)  # diagonal should be equal to occurrence counts
-        self._co_occurrences = co_occ + co_occ.T - sps.diags(co_occ.diagonal(), dtype='uint32')
+        self._co_occurrences = \
+            co_occ + co_occ.T - sps.diags(co_occ.diagonal(), offsets=0, dtype='uint32')
 
     def _get_occurrences(self, word_id):
         return self._occurrences[word_id]