Skip to content

Commit

Permalink
Renamed reduced_windows to shrink_windows.
Browse files Browse the repository at this point in the history
  • Loading branch information
pandrey-fr committed Jun 10, 2021
1 parent 6a93037 commit f323b68
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 17 deletions.
18 changes: 9 additions & 9 deletions gensim/models/word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def __init__(
max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=(),
comment=None, max_final_vocab=None, reduced_windows=True,
comment=None, max_final_vocab=None, shrink_windows=True,
):
"""Train, use and evaluate neural networks described in https://code.google.com/p/word2vec/.
Expand Down Expand Up @@ -345,7 +345,7 @@ def __init__(
:meth:`~gensim.models.word2vec.Word2Vec.get_latest_training_loss`.
callbacks : iterable of :class:`~gensim.models.callbacks.CallbackAny2Vec`, optional
Sequence of callbacks to be executed at specific stages during training.
reduced_windows : bool, optional
shrink_windows : bool, optional
If True, the effective window size is uniformly sampled from [1, `window`]
for each target word during training, to match the original word2vec algorithm's
approximate weighting of context words by distance. Otherwise, the effective
Expand Down Expand Up @@ -382,7 +382,7 @@ def __init__(
self.min_alpha = float(min_alpha)

self.window = int(window)
self.reduced_windows = bool(reduced_windows)
self.shrink_windows = bool(shrink_windows)
self.random = np.random.RandomState(seed)

self.hs = int(hs)
Expand Down Expand Up @@ -426,7 +426,7 @@ def __init__(
corpus_iterable=corpus_iterable, corpus_file=corpus_file, total_examples=self.corpus_count,
total_words=self.corpus_total_words, epochs=self.epochs, start_alpha=self.alpha,
end_alpha=self.min_alpha, compute_loss=self.compute_loss, callbacks=callbacks,
reduced_windows=self.reduced_windows)
shrink_windows=self.shrink_windows)
else:
if trim_rule is not None:
logger.warning(
Expand Down Expand Up @@ -969,7 +969,7 @@ def train(
self, corpus_iterable=None, corpus_file=None, total_examples=None,
total_words=None, epochs=None, start_alpha=None, end_alpha=None,
word_count=0, queue_factor=2, report_delay=1.0, compute_loss=False,
reduced_windows=None, callbacks=(), **kwargs,
shrink_windows=None, callbacks=(), **kwargs,
):
"""Update the model's neural weights from a sequence of sentences.
Expand Down Expand Up @@ -1026,7 +1026,7 @@ def train(
compute_loss: bool, optional
If True, computes and stores loss value which can be retrieved using
:meth:`~gensim.models.word2vec.Word2Vec.get_latest_training_loss`.
reduced_windows : bool, optional
shrink_windows : bool, optional
If True, the effective window size is uniformly sampled from [1, `window`]
for each target word during training, to match the original word2vec algorithm's
approximate weighting of context words by distance. Otherwise, the effective
Expand All @@ -1050,8 +1050,8 @@ def train(
self.alpha = start_alpha or self.alpha
self.min_alpha = end_alpha or self.min_alpha
self.epochs = epochs
if reduced_windows is not None:
self.reduced_windows = bool(reduced_windows)
if shrink_windows is not None:
self.shrink_windows = bool(shrink_windows)

self._check_training_sanity(epochs=epochs, total_examples=total_examples, total_words=total_words)
self._check_corpus_sanity(corpus_iterable=corpus_iterable, corpus_file=corpus_file, passes=epochs)
Expand All @@ -1061,7 +1061,7 @@ def train(
msg=(
f"training model with {self.workers} workers on {len(self.wv)} vocabulary and "
f"{self.layer1_size} features, using sg={self.sg} hs={self.hs} sample={self.sample} "
f"negative={self.negative} window={self.window} reduced_windows={self.reduced_windows}"
f"negative={self.negative} window={self.window} shrink_windows={self.shrink_windows}"
),
)

Expand Down
12 changes: 6 additions & 6 deletions gensim/models/word2vec_corpusfile.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ cdef void prepare_c_structures_for_batch(
int *effective_words, int *effective_sentences, unsigned long long *next_random,
cvocab_t *vocab, int *sentence_idx, np.uint32_t *indexes, int *codelens,
np.uint8_t **codes, np.uint32_t **points, np.uint32_t *reduced_windows,
int do_reduced_windows) nogil:
int shrink_windows) nogil:
cdef VocabItem word
cdef string token
cdef vector[string] sent
Expand Down Expand Up @@ -226,7 +226,7 @@ cdef void prepare_c_structures_for_batch(

# precompute "reduced window" offsets in a single randint() call
for i in range(effective_words[0]):
if do_reduced_windows:
if shrink_windows:
reduced_windows[i] = random_int32(next_random) % window
else:
reduced_windows[i] = 0
Expand Down Expand Up @@ -299,7 +299,7 @@ def train_epoch_sg(model, corpus_file, offset, _cython_vocab, _cur_epoch, _expec
cdef long long total_sentences = 0
cdef long long total_effective_words = 0, total_words = 0
cdef int sent_idx, idx_start, idx_end
cdef int do_reduced_windows = int(model.reduced_windows)
cdef int shrink_windows = int(model.shrink_windows)

init_w2v_config(&c, model, _alpha, compute_loss, _work)

Expand All @@ -316,7 +316,7 @@ def train_epoch_sg(model, corpus_file, offset, _cython_vocab, _cur_epoch, _expec
prepare_c_structures_for_batch(
sentences, c.sample, c.hs, c.window, &total_words, &effective_words, &effective_sentences,
&c.next_random, vocab.get_vocab_ptr(), c.sentence_idx, c.indexes,
c.codelens, c.codes, c.points, c.reduced_windows, do_reduced_windows)
c.codelens, c.codes, c.points, c.reduced_windows, shrink_windows)

for sent_idx in range(effective_sentences):
idx_start = c.sentence_idx[sent_idx]
Expand Down Expand Up @@ -400,7 +400,7 @@ def train_epoch_cbow(model, corpus_file, offset, _cython_vocab, _cur_epoch, _exp
cdef long long total_sentences = 0
cdef long long total_effective_words = 0, total_words = 0
cdef int sent_idx, idx_start, idx_end
cdef int do_reduced_windows = int(model.reduced_windows)
cdef int shrink_windows = int(model.shrink_windows)

init_w2v_config(&c, model, _alpha, compute_loss, _work, _neu1)

Expand All @@ -417,7 +417,7 @@ def train_epoch_cbow(model, corpus_file, offset, _cython_vocab, _cur_epoch, _exp
prepare_c_structures_for_batch(
sentences, c.sample, c.hs, c.window, &total_words, &effective_words,
&effective_sentences, &c.next_random, vocab.get_vocab_ptr(), c.sentence_idx,
c.indexes, c.codelens, c.codes, c.points, c.reduced_windows, do_reduced_windows)
c.indexes, c.codelens, c.codes, c.points, c.reduced_windows, shrink_windows)

for sent_idx in range(effective_sentences):
idx_start = c.sentence_idx[sent_idx]
Expand Down
4 changes: 2 additions & 2 deletions gensim/models/word2vec_inner.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ def train_batch_sg(model, sentences, alpha, _work, compute_loss):
break # TODO: log warning, tally overflow?

# precompute "reduced window" offsets in a single randint() call
if model.reduced_windows:
if model.shrink_windows:
for i, item in enumerate(model.random.randint(0, c.window, effective_words)):
c.reduced_windows[i] = item
else:
Expand Down Expand Up @@ -669,7 +669,7 @@ def train_batch_cbow(model, sentences, alpha, _work, _neu1, compute_loss):
break # TODO: log warning, tally overflow?

# precompute "reduced window" offsets in a single randint() call
if model.reduced_windows:
if model.shrink_windows:
for i, item in enumerate(model.random.randint(0, c.window, effective_words)):
c.reduced_windows[i] = item
else:
Expand Down

0 comments on commit f323b68

Please sign in to comment.