diff --git a/gensim/models/coherencemodel.py b/gensim/models/coherencemodel.py index fd42f53359..a46414a1a5 100644 --- a/gensim/models/coherencemodel.py +++ b/gensim/models/coherencemodel.py @@ -460,9 +460,9 @@ def _relevant_ids_will_differ(self, new_topics): return not self._accumulator.relevant_ids.issuperset(new_set) def _topics_differ(self, new_topics): - return (new_topics is not None and - self._topics is not None and - not np.array_equal(new_topics, self._topics)) + return (new_topics is not None + and self._topics is not None + and not np.array_equal(new_topics, self._topics)) def _get_topics(self): """Internal helper function to return topics from a trained topic model.""" diff --git a/gensim/models/deprecated/doc2vec.py b/gensim/models/deprecated/doc2vec.py index 8d8875affe..b841866b93 100644 --- a/gensim/models/deprecated/doc2vec.py +++ b/gensim/models/deprecated/doc2vec.py @@ -242,8 +242,8 @@ def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=N if doctag_locks is None: doctag_locks = model.docvecs.doctag_syn0_lockf - word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab and - model.wv.vocab[w].sample_int > model.random.rand() * 2**32] + word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab + and model.wv.vocab[w].sample_int > model.random.rand() * 2**32] for pos, word in enumerate(word_vocabs): reduced_window = model.random.randint(model.window) # `b` in the original doc2vec code @@ -298,8 +298,8 @@ def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, if doctag_locks is None: doctag_locks = model.docvecs.doctag_syn0_lockf - word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab and - model.wv.vocab[w].sample_int > model.random.rand() * 2**32] + word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab + and model.wv.vocab[w].sample_int > model.random.rand() * 2**32] doctag_len = len(doctag_indexes) if doctag_len != model.dm_tag_count: return 0 # skip doc without expected number of doctag(s) (TODO: warn/pad?) diff --git a/gensim/models/deprecated/fasttext.py b/gensim/models/deprecated/fasttext.py index 47e7f1a6a8..836c66d4ca 100644 --- a/gensim/models/deprecated/fasttext.py +++ b/gensim/models/deprecated/fasttext.py @@ -148,8 +148,8 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None): """ result = 0 for sentence in sentences: - word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and - model.wv.vocab[w].sample_int > model.random.rand() * 2**32] + word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab + and model.wv.vocab[w].sample_int > model.random.rand() * 2**32] for pos, word in enumerate(word_vocabs): reduced_window = model.random.randint(model.window) start = max(0, pos - model.window + reduced_window) @@ -211,8 +211,8 @@ def train_batch_sg(model, sentences, alpha, work=None, neu1=None): """ result = 0 for sentence in sentences: - word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and - model.wv.vocab[w].sample_int > model.random.rand() * 2**32] + word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab + and model.wv.vocab[w].sample_int > model.random.rand() * 2**32] for pos, word in enumerate(word_vocabs): reduced_window = model.random.randint(model.window) # `b` in the original word2vec code # now go over all words from the (reduced) window, predicting each one in turn diff --git a/gensim/models/deprecated/old_saveload.py b/gensim/models/deprecated/old_saveload.py index 44f4a5619d..c609dd5532 100644 --- a/gensim/models/deprecated/old_saveload.py +++ b/gensim/models/deprecated/old_saveload.py @@ -108,8 +108,8 @@ def _load_specials(self, fname, mmap, compress, subname): """ def mmap_error(obj, filename): return IOError( - 'Cannot mmap compressed object %s in file %s. ' % (obj, filename) + - 'Use `load(fname, mmap=None)` or uncompress files manually.' + 'Cannot mmap compressed object %s in file %s. ' % (obj, filename) + + 'Use `load(fname, mmap=None)` or uncompress files manually.' ) for attrib in getattr(self, '__recursive_saveloads', []): diff --git a/gensim/models/deprecated/word2vec.py b/gensim/models/deprecated/word2vec.py index 885d77ba66..d647bfb8f1 100644 --- a/gensim/models/deprecated/word2vec.py +++ b/gensim/models/deprecated/word2vec.py @@ -232,8 +232,8 @@ def train_batch_sg(model, sentences, alpha, work=None, compute_loss=False): """ result = 0 for sentence in sentences: - word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and - model.wv.vocab[w].sample_int > model.random.rand() * 2**32] + word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab + and model.wv.vocab[w].sample_int > model.random.rand() * 2**32] for pos, word in enumerate(word_vocabs): reduced_window = model.random.randint(model.window) # `b` in the original word2vec code @@ -263,8 +263,8 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None, compute_loss """ result = 0 for sentence in sentences: - word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and - model.wv.vocab[w].sample_int > model.random.rand() * 2**32] + word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab + and model.wv.vocab[w].sample_int > model.random.rand() * 2**32] for pos, word in enumerate(word_vocabs): reduced_window = model.random.randint(model.window) # `b` in the original word2vec code start = max(0, pos - model.window + reduced_window) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 6a6b3d3ae9..d9b905cb3b 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -227,8 +227,8 @@ def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=N if doctag_locks is None: doctag_locks = model.docvecs.doctag_syn0_lockf - word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab and - model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32] + word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab + and model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32] for pos, word in enumerate(word_vocabs): reduced_window = model.random.randint(model.window) # `b` in the original doc2vec code @@ -314,8 +314,8 @@ def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, if doctag_locks is None: doctag_locks = model.docvecs.doctag_syn0_lockf - word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab and - model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32] + word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab + and model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32] doctag_len = len(doctag_indexes) if doctag_len != model.dm_tag_count: return 0 # skip doc without expected number of doctag(s) (TODO: warn/pad?) diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index f7e9d65556..17b314fec9 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -140,8 +140,8 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None): """ result = 0 for sentence in sentences: - word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and - model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32] + word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab + and model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32] for pos, word in enumerate(word_vocabs): reduced_window = model.random.randint(model.window) start = max(0, pos - model.window + reduced_window) @@ -199,8 +199,8 @@ def train_batch_sg(model, sentences, alpha, work=None, neu1=None): """ result = 0 for sentence in sentences: - word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and - model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32] + word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab + and model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32] for pos, word in enumerate(word_vocabs): reduced_window = model.random.randint(model.window) # `b` in the original word2vec code # now go over all words from the (reduced) window, predicting each one in turn diff --git a/gensim/models/hdpmodel.py b/gensim/models/hdpmodel.py index 46de2efa25..3c24fb4561 100755 --- a/gensim/models/hdpmodel.py +++ b/gensim/models/hdpmodel.py @@ -510,13 +510,13 @@ def update_finished(self, start_time, chunks_processed, docs_processed): """ return ( # chunk limit reached - (self.max_chunks and chunks_processed == self.max_chunks) or + (self.max_chunks and chunks_processed == self.max_chunks) # time limit reached - (self.max_time and time.clock() - start_time > self.max_time) or + or (self.max_time and time.clock() - start_time > self.max_time) # no limits and whole corpus has been processed once - (not self.max_chunks and not self.max_time and docs_processed >= self.m_D)) + or (not self.max_chunks and not self.max_time and docs_processed >= self.m_D)) def update_chunk(self, chunk, update=True, opt_o=True): """Performs lazy update on necessary columns of lambda and variational inference for documents in the chunk. diff --git a/gensim/models/ldamulticore.py b/gensim/models/ldamulticore.py index 248cc83abc..d154e367df 100644 --- a/gensim/models/ldamulticore.py +++ b/gensim/models/ldamulticore.py @@ -276,9 +276,9 @@ def process_result_queue(force=False): if (force and merged_new and queue_size[0] == 0) or (not self.batch and (other.numdocs >= updateafter)): self.do_mstep(rho(), other, pass_ > 0) other.reset() - if self.eval_every is not None and \ - ((force and queue_size[0] == 0) or - (self.eval_every != 0 and (self.num_updates / updateafter) % self.eval_every == 0)): + if self.eval_every is not None \ + and ((force and queue_size[0] == 0) + or (self.eval_every != 0 and (self.num_updates / updateafter) % self.eval_every == 0)): self.log_perplexity(chunk, total_docs=lencorpus) chunk_stream = utils.grouper(corpus, self.chunksize, as_numpy=chunks_as_numpy) diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index 098905420b..bcb5beee20 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -193,8 +193,8 @@ def train_batch_sg(model, sentences, alpha, work=None, compute_loss=False): """ result = 0 for sentence in sentences: - word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and - model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32] + word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab + and model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32] for pos, word in enumerate(word_vocabs): reduced_window = model.random.randint(model.window) # `b` in the original word2vec code diff --git a/gensim/summarization/mz_entropy.py b/gensim/summarization/mz_entropy.py index 11437f5c86..492ae7e79a 100644 --- a/gensim/summarization/mz_entropy.py +++ b/gensim/summarization/mz_entropy.py @@ -98,9 +98,9 @@ def marginal_prob(n, m): occurring m times in a given block""" return numpy.exp( - __log_combinations(n, m) + - __log_combinations(n_words - n, blocksize - m) - - __log_combinations(n_words, blocksize) + __log_combinations(n, m) + + __log_combinations(n_words - n, blocksize - m) + - __log_combinations(n_words, blocksize) ) return numpy.frompyfunc(marginal_prob, 2, 1) diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py index d35b907800..d61adef085 100644 --- a/gensim/test/test_doc2vec.py +++ b/gensim/test/test_doc2vec.py @@ -340,8 +340,8 @@ def test_similarity_unseen_docs(self): model = doc2vec.Doc2Vec(min_count=1) model.build_vocab(corpus) self.assertTrue( - model.docvecs.similarity_unseen_docs(model, rome_str, rome_str) > - model.docvecs.similarity_unseen_docs(model, rome_str, car_str) + model.docvecs.similarity_unseen_docs(model, rome_str, rome_str) + > model.docvecs.similarity_unseen_docs(model, rome_str, car_str) ) def model_sanity(self, model, keep_training=True): diff --git a/gensim/test/test_keyedvectors.py b/gensim/test/test_keyedvectors.py index 927b896c64..0259fea7af 100644 --- a/gensim/test/test_keyedvectors.py +++ b/gensim/test/test_keyedvectors.py @@ -38,8 +38,7 @@ def test_similarity_matrix(self): similarity_matrix = self.vectors.similarity_matrix(dictionary).todense() self.assertTrue((similarity_matrix.T == similarity_matrix).all()) self.assertTrue( - (np.diag(similarity_matrix) == - np.ones(similarity_matrix.shape[0])).all()) + (np.diag(similarity_matrix) == np.ones(similarity_matrix.shape[0])).all()) # checking that thresholding works as expected similarity_matrix = self.vectors.similarity_matrix(dictionary, threshold=0.45).todense() diff --git a/gensim/utils.py b/gensim/utils.py index 0359125db5..a5c7c94ab7 100644 --- a/gensim/utils.py +++ b/gensim/utils.py @@ -447,8 +447,8 @@ def _load_specials(self, fname, mmap, compress, subname): """ def mmap_error(obj, filename): return IOError( - 'Cannot mmap compressed object %s in file %s. ' % (obj, filename) + - 'Use `load(fname, mmap=None)` or uncompress files manually.' + 'Cannot mmap compressed object %s in file %s. ' % (obj, filename) + + 'Use `load(fname, mmap=None)` or uncompress files manually.' ) for attrib in getattr(self, '__recursive_saveloads', []):