Make negative ns_exponent work correctly (piskvorky#3250)

* add tests with negative ns_exponent * fix flake8 * explicitly cast ns_exponent to FLOAT * Apply suggestions from code review * dynamic cast * Update CHANGELOG.md * Update CHANGELOG.md Co-authored-by: Michael Penkov <m@penkov.dev>
pabs3 · Oct 28, 2021 · 6e36266 · 6e36266
1 parent e51288c
commit 6e36266
Show file tree

Hide file tree

Showing 5 changed files with 30 additions and 2 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,7 @@ Changes
 * [#3194](https://github.com/RaRe-Technologies/gensim/pull/3194): Added random_seed parameter to make LsiModel reproducible, by [@parashardhapola](https://github.com/parashardhapola)
 * [#3251](https://github.com/RaRe-Technologies/gensim/pull/3251): Apply new convention of delimiting instance params in str function, by [@menshikh-iv](https://github.com/menshikh-iv)
 * [#3227](https://github.com/RaRe-Technologies/gensim/pull/3227): Fix FastText doc-comment example for `build_vocab` and `train` to use correct argument names, by [@HLasse](https://github.com/HLasse)
+* [#3250](https://github.com/RaRe-Technologies/gensim/pull/3250): Make negative ns_exponent work correctly, by [@menshikh-iv](https://github.com/menshikh-iv)
 
 ## 4.1.2, 2021-09-17
 

diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py
@@ -833,11 +833,11 @@ def make_cum_table(self, domain=2**31 - 1):
         train_words_pow = 0.0
         for word_index in range(vocab_size):
             count = self.wv.get_vecattr(word_index, 'count')
-            train_words_pow += count**self.ns_exponent
+            train_words_pow += count**float(self.ns_exponent)
         cumulative = 0.0
         for word_index in range(vocab_size):
             count = self.wv.get_vecattr(word_index, 'count')
-            cumulative += count**self.ns_exponent
+            cumulative += count**float(self.ns_exponent)
             self.cum_table[word_index] = round(cumulative / train_words_pow * domain)
         if len(self.cum_table) > 0:
             assert self.cum_table[-1] == domain

diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py
@@ -720,6 +720,15 @@ def test_train_warning(self, loglines):
     def test_load_on_class_error(self):
         """Test if exception is raised when loading doc2vec model on instance"""
         self.assertRaises(AttributeError, load_on_instance)
+
+    def test_negative_ns_exp(self):
+        """The model should accept a negative ns_exponent as a valid value."""
+        model = doc2vec.Doc2Vec(sentences, ns_exponent=-1, min_count=1, workers=1)
+        tmpf = get_tmpfile('d2v_negative_exp.tst')
+        model.save(tmpf)
+        loaded_model = doc2vec.Doc2Vec.load(tmpf)
+        loaded_model.train(sentences, total_examples=model.corpus_count, epochs=1)
+        assert loaded_model.ns_exponent == -1, loaded_model.ns_exponent
 # endclass TestDoc2VecModel
 
 

diff --git a/gensim/test/test_fasttext.py b/gensim/test/test_fasttext.py
@@ -762,6 +762,15 @@ def test_vectors_for_all_without_inference(self):
         predicted = vectors_for_all['responding']
         assert np.allclose(expected, predicted)
 
+    def test_negative_ns_exp(self):
+        """The model should accept a negative ns_exponent as a valid value."""
+        model = FT_gensim(sentences, ns_exponent=-1, min_count=1, workers=1)
+        tmpf = get_tmpfile('fasttext_negative_exp.tst')
+        model.save(tmpf)
+        loaded_model = FT_gensim.load(tmpf)
+        loaded_model.train(sentences, total_examples=model.corpus_count, epochs=1)
+        assert loaded_model.ns_exponent == -1, loaded_model.ns_exponent
+
 
 @pytest.mark.parametrize('shrink_windows', [True, False])
 def test_cbow_hs_training(shrink_windows):

diff --git a/gensim/test/test_word2vec.py b/gensim/test/test_word2vec.py
@@ -1054,6 +1054,15 @@ def test_compute_training_loss(self):
         training_loss_val = model.get_latest_training_loss()
         self.assertTrue(training_loss_val > 0.0)
 
+    def test_negative_ns_exp(self):
+        """The model should accept a negative ns_exponent as a valid value."""
+        model = word2vec.Word2Vec(sentences, ns_exponent=-1, min_count=1, workers=1)
+        tmpf = get_tmpfile('w2v_negative_exp.tst')
+        model.save(tmpf)
+        loaded_model = word2vec.Word2Vec.load(tmpf)
+        loaded_model.train(sentences, total_examples=model.corpus_count, epochs=1)
+        assert loaded_model.ns_exponent == -1, loaded_model.ns_exponent
+
 
 # endclass TestWord2VecModel