From 526d6f96cc7ba201106ab106a73a2d0c6e521bc1 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 1 May 2019 12:59:03 -0700 Subject: [PATCH 1/3] Fix misleading Doc2Vec.docvecs comment Existing doc-comment was confused & misleading, implying `Doc2Vec` handles word-senses by giving single word tokens different word-vectors in different contexts. (See for an example confused user.) `Doc2Vec` doesn't do that, so this changes the comment to be matter-of-fact about accessing vectors via `.docvecs`. --- gensim/models/doc2vec.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 9812dc5ef4..1cf070b05d 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -447,18 +447,13 @@ class Doc2Vec(BaseWordEmbeddingsModel): directly to query those embeddings in various ways. See the module level docstring for examples. docvecs : :class:`~gensim.models.keyedvectors.Doc2VecKeyedVectors` - This object contains the paragraph vectors. Remember that the only difference between this model and - :class:`~gensim.models.word2vec.Word2Vec` is that besides the word vectors we also include paragraph embeddings - to capture the paragraph. - - In this way we can capture the difference between the same word used in a different context. - For example we now have a different representation of the word "leaves" in the following two sentences :: - - 1. Manos leaves the office every day at 18:00 to catch his train - 2. This season is called Fall, because leaves fall from the trees. + This object contains the paragraph vectors learned from the training data. There will be one such vector + for each unique document tag supplied during training. They may be individually accessed using the tag + as an indexed-access key. For example, if one of the training documents used a tag of 'doc003': + + .. sourcecode:: pycon - In a plain :class:`~gensim.models.word2vec.Word2Vec` model the word would have exactly the same representation - in both sentences, in :class:`~gensim.models.doc2vec.Doc2Vec` it will not. + >>> model.docvecs['doc003'] vocabulary : :class:`~gensim.models.doc2vec.Doc2VecVocab` This object represents the vocabulary (sometimes called Dictionary in gensim) of the model. From 6713ba9bb2b6daf242c4dd293cbb1aeb4f60321f Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Thu, 2 May 2019 11:30:59 -0700 Subject: [PATCH 2/3] rm trailing whitespace --- gensim/models/doc2vec.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 1cf070b05d..4efb3ae2d0 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -447,8 +447,8 @@ class Doc2Vec(BaseWordEmbeddingsModel): directly to query those embeddings in various ways. See the module level docstring for examples. docvecs : :class:`~gensim.models.keyedvectors.Doc2VecKeyedVectors` - This object contains the paragraph vectors learned from the training data. There will be one such vector - for each unique document tag supplied during training. They may be individually accessed using the tag + This object contains the paragraph vectors learned from the training data. There will be one such vector + for each unique document tag supplied during training. They may be individually accessed using the tag as an indexed-access key. For example, if one of the training documents used a tag of 'doc003': .. sourcecode:: pycon From 96a425d77f3607d4c02702ade630349a2c0245a9 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Thu, 2 May 2019 18:32:49 -0700 Subject: [PATCH 3/3] rm more whitespace --- gensim/models/doc2vec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 4efb3ae2d0..eff6a6ed37 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -450,7 +450,7 @@ class Doc2Vec(BaseWordEmbeddingsModel): This object contains the paragraph vectors learned from the training data. There will be one such vector for each unique document tag supplied during training. They may be individually accessed using the tag as an indexed-access key. For example, if one of the training documents used a tag of 'doc003': - + .. sourcecode:: pycon >>> model.docvecs['doc003']