Skip to content

Commit

Permalink
Make negative ns_exponent work correctly (piskvorky#3250)
Browse files Browse the repository at this point in the history
* add tests with negative ns_exponent

* fix flake8

* explicitly cast ns_exponent to FLOAT

* Apply suggestions from code review

* dynamic cast

* Update CHANGELOG.md

* Update CHANGELOG.md

Co-authored-by: Michael Penkov <m@penkov.dev>
  • Loading branch information
menshikh-iv and mpenkov authored Oct 28, 2021
1 parent e51288c commit 6e36266
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Changes
* [#3194](https://github.com/RaRe-Technologies/gensim/pull/3194): Added random_seed parameter to make LsiModel reproducible, by [@parashardhapola](https://github.com/parashardhapola)
* [#3251](https://github.com/RaRe-Technologies/gensim/pull/3251): Apply new convention of delimiting instance params in str function, by [@menshikh-iv](https://github.com/menshikh-iv)
* [#3227](https://github.com/RaRe-Technologies/gensim/pull/3227): Fix FastText doc-comment example for `build_vocab` and `train` to use correct argument names, by [@HLasse](https://github.com/HLasse)
* [#3250](https://github.com/RaRe-Technologies/gensim/pull/3250): Make negative ns_exponent work correctly, by [@menshikh-iv](https://github.com/menshikh-iv)

## 4.1.2, 2021-09-17

Expand Down
4 changes: 2 additions & 2 deletions gensim/models/word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -833,11 +833,11 @@ def make_cum_table(self, domain=2**31 - 1):
train_words_pow = 0.0
for word_index in range(vocab_size):
count = self.wv.get_vecattr(word_index, 'count')
train_words_pow += count**self.ns_exponent
train_words_pow += count**float(self.ns_exponent)
cumulative = 0.0
for word_index in range(vocab_size):
count = self.wv.get_vecattr(word_index, 'count')
cumulative += count**self.ns_exponent
cumulative += count**float(self.ns_exponent)
self.cum_table[word_index] = round(cumulative / train_words_pow * domain)
if len(self.cum_table) > 0:
assert self.cum_table[-1] == domain
Expand Down
9 changes: 9 additions & 0 deletions gensim/test/test_doc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,15 @@ def test_train_warning(self, loglines):
def test_load_on_class_error(self):
"""Test if exception is raised when loading doc2vec model on instance"""
self.assertRaises(AttributeError, load_on_instance)

def test_negative_ns_exp(self):
"""The model should accept a negative ns_exponent as a valid value."""
model = doc2vec.Doc2Vec(sentences, ns_exponent=-1, min_count=1, workers=1)
tmpf = get_tmpfile('d2v_negative_exp.tst')
model.save(tmpf)
loaded_model = doc2vec.Doc2Vec.load(tmpf)
loaded_model.train(sentences, total_examples=model.corpus_count, epochs=1)
assert loaded_model.ns_exponent == -1, loaded_model.ns_exponent
# endclass TestDoc2VecModel


Expand Down
9 changes: 9 additions & 0 deletions gensim/test/test_fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,15 @@ def test_vectors_for_all_without_inference(self):
predicted = vectors_for_all['responding']
assert np.allclose(expected, predicted)

def test_negative_ns_exp(self):
"""The model should accept a negative ns_exponent as a valid value."""
model = FT_gensim(sentences, ns_exponent=-1, min_count=1, workers=1)
tmpf = get_tmpfile('fasttext_negative_exp.tst')
model.save(tmpf)
loaded_model = FT_gensim.load(tmpf)
loaded_model.train(sentences, total_examples=model.corpus_count, epochs=1)
assert loaded_model.ns_exponent == -1, loaded_model.ns_exponent


@pytest.mark.parametrize('shrink_windows', [True, False])
def test_cbow_hs_training(shrink_windows):
Expand Down
9 changes: 9 additions & 0 deletions gensim/test/test_word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -1054,6 +1054,15 @@ def test_compute_training_loss(self):
training_loss_val = model.get_latest_training_loss()
self.assertTrue(training_loss_val > 0.0)

def test_negative_ns_exp(self):
"""The model should accept a negative ns_exponent as a valid value."""
model = word2vec.Word2Vec(sentences, ns_exponent=-1, min_count=1, workers=1)
tmpf = get_tmpfile('w2v_negative_exp.tst')
model.save(tmpf)
loaded_model = word2vec.Word2Vec.load(tmpf)
loaded_model.train(sentences, total_examples=model.corpus_count, epochs=1)
assert loaded_model.ns_exponent == -1, loaded_model.ns_exponent


# endclass TestWord2VecModel

Expand Down

0 comments on commit 6e36266

Please sign in to comment.