Skip to content

Commit

Permalink
Eliminate obsolete step parameter from doc2vec infer_vector and simil…
Browse files Browse the repository at this point in the history
…arity_unseen_docs (#3176)

* Fix: eliminate step params

* Fix: typo in doc2vec.infer_vector() documentation

* Update CHANGELOG.md

Co-authored-by: Michael Penkov <m@penkov.dev>
  • Loading branch information
rock420 and mpenkov authored Jun 29, 2021
1 parent bdcd100 commit d59a241
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 9 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ Changes
### :red_circle: Bug fixes

* [#3116](https://github.com/RaRe-Technologies/gensim/pull/3116): Fix bug where saved Phrases model did not load its connector_words, by [@aloknayak29](https://github.com/aloknayak29)
* [#3136](https://github.com/RaRe-Technologies/gensim/pull/3136): fix indexing error in word2vec_inner.pyx, by [@bluekura](https://github.com/bluekura)
* [#3136](https://github.com/RaRe-Technologies/gensim/pull/3136): Fix indexing error in word2vec_inner.pyx, by [@bluekura](https://github.com/bluekura)
* [#3174](https://github.com/RaRe-Technologies/gensim/pull/3174): Fix a bug when upgrading phraser from gensim 3.x to 4.0, by [@emgucv](https://github.com/emgucv)
* [#3176](https://github.com/RaRe-Technologies/gensim/pull/3176): Eliminate obsolete step parameter from doc2vec infer_vector and similarity_unseen_docs, by [@rock420](https://github.com/rock420)

### :+1: Improvements

Expand Down
12 changes: 6 additions & 6 deletions gensim/models/doc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,13 +581,13 @@ def estimated_lookup_memory(self):
"""
return 60 * len(self.dv) + 140 * len(self.dv)

def infer_vector(self, doc_words, alpha=None, min_alpha=None, epochs=None, steps=None):
def infer_vector(self, doc_words, alpha=None, min_alpha=None, epochs=None):
"""Infer a vector for given post-bulk training document.
Notes
-----
Subsequent calls to this function may infer different representations for the same document.
For a more stable representation, increase the number of steps to assert a stricket convergence.
For a more stable representation, increase the number of epochs to assert a stricter convergence.
Parameters
----------
Expand Down Expand Up @@ -1047,7 +1047,7 @@ def scan_vocab(self, corpus_iterable=None, corpus_file=None, progress_per=10000,

return total_words, corpus_count

def similarity_unseen_docs(self, doc_words1, doc_words2, alpha=None, min_alpha=None, steps=None):
def similarity_unseen_docs(self, doc_words1, doc_words2, alpha=None, min_alpha=None, epochs=None):
"""Compute cosine similarity between two post-bulk out of training documents.
Parameters
Expand All @@ -1062,7 +1062,7 @@ def similarity_unseen_docs(self, doc_words1, doc_words2, alpha=None, min_alpha=N
The initial learning rate.
min_alpha : float, optional
Learning rate will linearly drop to `min_alpha` as training progresses.
steps : int, optional
epochs : int, optional
Number of epoch to train the new document.
Returns
Expand All @@ -1071,8 +1071,8 @@ def similarity_unseen_docs(self, doc_words1, doc_words2, alpha=None, min_alpha=N
The cosine similarity between `doc_words1` and `doc_words2`.
"""
d1 = self.infer_vector(doc_words=doc_words1, alpha=alpha, min_alpha=min_alpha, steps=steps)
d2 = self.infer_vector(doc_words=doc_words2, alpha=alpha, min_alpha=min_alpha, steps=steps)
d1 = self.infer_vector(doc_words=doc_words1, alpha=alpha, min_alpha=min_alpha, epochs=epochs)
d2 = self.infer_vector(doc_words=doc_words2, alpha=alpha, min_alpha=min_alpha, epochs=epochs)
return np.dot(matutils.unitvec(d1), matutils.unitvec(d2))


Expand Down
4 changes: 2 additions & 2 deletions gensim/test/test_doc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,8 +718,8 @@ def __str__(self):
def epochs(self):
return self.models[0].epochs

def infer_vector(self, document, alpha=None, min_alpha=None, epochs=None, steps=None):
return np.concatenate([model.infer_vector(document, alpha, min_alpha, epochs, steps) for model in self.models])
def infer_vector(self, document, alpha=None, min_alpha=None, epochs=None):
return np.concatenate([model.infer_vector(document, alpha, min_alpha, epochs) for model in self.models])

def train(self, *ignore_args, **ignore_kwargs):
pass # train subcomponents individually
Expand Down

0 comments on commit d59a241

Please sign in to comment.