From 58b8cc9fcc8345d75b722a2b1e950ad28a77008f Mon Sep 17 00:00:00 2001 From: Joao Moreira <13685125+jagmoreira@users.noreply.github.com> Date: Sat, 6 Oct 2018 17:00:47 -0500 Subject: [PATCH 1/3] Update documentation for WmdSimilarity. --- gensim/similarities/docsim.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gensim/similarities/docsim.py b/gensim/similarities/docsim.py index d033eb4fc7..8ab00b9533 100755 --- a/gensim/similarities/docsim.py +++ b/gensim/similarities/docsim.py @@ -826,7 +826,7 @@ def get_similarities(self, query): Parameters ---------- - query : {list of (int, number), iterable of list of (int, number), :class:`scipy.sparse.csr_matrix` + query : list of (int, number), iterable of list of (int, number), :class:`scipy.sparse.csr_matrix` Document or collection of documents. Return @@ -938,7 +938,7 @@ def get_similarities(self, query): Parameters ---------- - query : {list of (int, number), iterable of list of (int, number) + query : list of (int, number), iterable of list of (int, number) Document or collection of documents. Return @@ -1018,8 +1018,8 @@ def __init__(self, corpus, w2v_model, num_best=None, normalize_w2v_and_replace=T Parameters ---------- - corpus: iterable of list of (int, float) - A list of documents in the BoW format. + corpus: iterable of list of str + A list of documents, each of which is a list of tokens. w2v_model: :class:`~gensim.models.word2vec.Word2VecTrainables` A trained word2vec model. num_best: int, optional @@ -1058,7 +1058,7 @@ def get_similarities(self, query): Parameters ---------- - query : {list of (int, number), iterable of list of (int, number) + query : list of str, iterable of list of str Document or collection of documents. Return @@ -1194,7 +1194,7 @@ def get_similarities(self, query): Parameters ---------- - query : {list of (int, number), iterable of list of (int, number), :class:`scipy.sparse.csr_matrix` + query : list of (int, number), iterable of list of (int, number), :class:`scipy.sparse.csr_matrix` Document or collection of documents. Return From b7179ab761122231b8da823a3e13feca1d7bf043 Mon Sep 17 00:00:00 2001 From: Joao Moreira <13685125+jagmoreira@users.noreply.github.com> Date: Sat, 6 Oct 2018 17:09:39 -0500 Subject: [PATCH 2/3] Fix curly braces issue. --- gensim/similarities/docsim.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gensim/similarities/docsim.py b/gensim/similarities/docsim.py index 8ab00b9533..99c12d044b 100755 --- a/gensim/similarities/docsim.py +++ b/gensim/similarities/docsim.py @@ -826,7 +826,7 @@ def get_similarities(self, query): Parameters ---------- - query : list of (int, number), iterable of list of (int, number), :class:`scipy.sparse.csr_matrix` + query : {list of (int, number), iterable of list of (int, number), :class:`scipy.sparse.csr_matrix`} Document or collection of documents. Return @@ -938,7 +938,7 @@ def get_similarities(self, query): Parameters ---------- - query : list of (int, number), iterable of list of (int, number) + query : {list of (int, number), iterable of list of (int, number)} Document or collection of documents. Return @@ -1058,7 +1058,7 @@ def get_similarities(self, query): Parameters ---------- - query : list of str, iterable of list of str + query : {list of str, iterable of list of str} Document or collection of documents. Return @@ -1194,7 +1194,7 @@ def get_similarities(self, query): Parameters ---------- - query : list of (int, number), iterable of list of (int, number), :class:`scipy.sparse.csr_matrix` + query : {list of (int, number), iterable of list of (int, number), :class:`scipy.sparse.csr_matrix`} Document or collection of documents. Return From e775c25c08d2e443aaa45b43ddacd15ad2b922b8 Mon Sep 17 00:00:00 2001 From: Joao Moreira <13685125+jagmoreira@users.noreply.github.com> Date: Sun, 7 Oct 2018 08:58:04 -0500 Subject: [PATCH 3/3] Fix WmdSimilarity docstring example. --- gensim/similarities/docsim.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/gensim/similarities/docsim.py b/gensim/similarities/docsim.py index 99c12d044b..e301486eb3 100755 --- a/gensim/similarities/docsim.py +++ b/gensim/similarities/docsim.py @@ -978,7 +978,7 @@ def __str__(self): class WmdSimilarity(interfaces.SimilarityABC): - """Compute negative WMD similarity against a corpus of documents by storing the index matrix in memory. + """Compute negative WMD similarity against a corpus of documents. See :class:`~gensim.models.keyedvectors.WordEmbeddingsKeyedVectors` for more information. Also, tutorial `notebook @@ -999,17 +999,14 @@ class WmdSimilarity(interfaces.SimilarityABC): .. sourcecode:: pycon >>> from gensim.test.utils import common_texts - >>> from gensim.corpora import Dictionary >>> from gensim.models import Word2Vec >>> from gensim.similarities import WmdSimilarity >>> >>> model = Word2Vec(common_texts, size=20, min_count=1) # train word-vectors - >>> dictionary = Dictionary(common_texts) - >>> bow_corpus = [dictionary.doc2bow(document) for document in common_texts] >>> - >>> index = WmdSimilarity(bow_corpus, model) + >>> index = WmdSimilarity(common_texts, model) >>> # Make query. - >>> query = 'trees' + >>> query = ['trees'] >>> sims = index[query] """