From 84b7b181e80fd181465456127cefb8753804e9b0 Mon Sep 17 00:00:00 2001 From: Ayan Date: Fri, 18 Jun 2021 11:36:26 +0530 Subject: [PATCH 1/3] Fix: eliminate step params --- gensim/models/doc2vec.py | 12 ++++++------ gensim/test/test_doc2vec.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 23f7d9fc7e..c30d14b5bc 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -581,13 +581,13 @@ def estimated_lookup_memory(self): """ return 60 * len(self.dv) + 140 * len(self.dv) - def infer_vector(self, doc_words, alpha=None, min_alpha=None, epochs=None, steps=None): + def infer_vector(self, doc_words, alpha=None, min_alpha=None, epochs=None): """Infer a vector for given post-bulk training document. Notes ----- Subsequent calls to this function may infer different representations for the same document. - For a more stable representation, increase the number of steps to assert a stricket convergence. + For a more stable representation, increase the number of epochs to assert a stricket convergence. Parameters ---------- @@ -1047,7 +1047,7 @@ def scan_vocab(self, corpus_iterable=None, corpus_file=None, progress_per=10000, return total_words, corpus_count - def similarity_unseen_docs(self, doc_words1, doc_words2, alpha=None, min_alpha=None, steps=None): + def similarity_unseen_docs(self, doc_words1, doc_words2, alpha=None, min_alpha=None, epochs=None): """Compute cosine similarity between two post-bulk out of training documents. Parameters @@ -1062,7 +1062,7 @@ def similarity_unseen_docs(self, doc_words1, doc_words2, alpha=None, min_alpha=N The initial learning rate. min_alpha : float, optional Learning rate will linearly drop to `min_alpha` as training progresses. - steps : int, optional + epochs : int, optional Number of epoch to train the new document. Returns @@ -1071,8 +1071,8 @@ def similarity_unseen_docs(self, doc_words1, doc_words2, alpha=None, min_alpha=N The cosine similarity between `doc_words1` and `doc_words2`. """ - d1 = self.infer_vector(doc_words=doc_words1, alpha=alpha, min_alpha=min_alpha, steps=steps) - d2 = self.infer_vector(doc_words=doc_words2, alpha=alpha, min_alpha=min_alpha, steps=steps) + d1 = self.infer_vector(doc_words=doc_words1, alpha=alpha, min_alpha=min_alpha, epochs=epochs) + d2 = self.infer_vector(doc_words=doc_words2, alpha=alpha, min_alpha=min_alpha, epochs=epochs) return np.dot(matutils.unitvec(d1), matutils.unitvec(d2)) diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py index 60c9158744..d532e7f494 100644 --- a/gensim/test/test_doc2vec.py +++ b/gensim/test/test_doc2vec.py @@ -718,8 +718,8 @@ def __str__(self): def epochs(self): return self.models[0].epochs - def infer_vector(self, document, alpha=None, min_alpha=None, epochs=None, steps=None): - return np.concatenate([model.infer_vector(document, alpha, min_alpha, epochs, steps) for model in self.models]) + def infer_vector(self, document, alpha=None, min_alpha=None, epochs=None): + return np.concatenate([model.infer_vector(document, alpha, min_alpha, epochs) for model in self.models]) def train(self, *ignore_args, **ignore_kwargs): pass # train subcomponents individually From c0668fdbf4b3727f7d1b539093b884323542f63a Mon Sep 17 00:00:00 2001 From: Ayan Date: Fri, 18 Jun 2021 12:37:07 +0530 Subject: [PATCH 2/3] Fix: typo in doc2vec.infer_vector() documentation --- gensim/models/doc2vec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index c30d14b5bc..19b377242b 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -587,7 +587,7 @@ def infer_vector(self, doc_words, alpha=None, min_alpha=None, epochs=None): Notes ----- Subsequent calls to this function may infer different representations for the same document. - For a more stable representation, increase the number of epochs to assert a stricket convergence. + For a more stable representation, increase the number of epochs to assert a stricter convergence. Parameters ---------- From 2157afa8936618c14d88cb3ea64e387be3c5b396 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Tue, 29 Jun 2021 09:55:19 +0900 Subject: [PATCH 3/3] Update CHANGELOG.md --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 423ce2b389..7e8b272287 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,8 +6,9 @@ Changes ### :red_circle: Bug fixes * [#3116](https://github.com/RaRe-Technologies/gensim/pull/3116): Fix bug where saved Phrases model did not load its connector_words, by [@aloknayak29](https://github.com/aloknayak29) -* [#3136](https://github.com/RaRe-Technologies/gensim/pull/3136): fix indexing error in word2vec_inner.pyx, by [@bluekura](https://github.com/bluekura) +* [#3136](https://github.com/RaRe-Technologies/gensim/pull/3136): Fix indexing error in word2vec_inner.pyx, by [@bluekura](https://github.com/bluekura) * [#3174](https://github.com/RaRe-Technologies/gensim/pull/3174): Fix a bug when upgrading phraser from gensim 3.x to 4.0, by [@emgucv](https://github.com/emgucv) +* [#3176](https://github.com/RaRe-Technologies/gensim/pull/3176): Eliminate obsolete step parameter from doc2vec infer_vector and similarity_unseen_docs, by [@rock420](https://github.com/rock420) ### :+1: Improvements