From ceb24ab60932b0e691868da50d1a03c6333255e7 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Sun, 26 Nov 2017 17:45:35 +0530 Subject: [PATCH 01/22] adds fasttext extension to setup --- MANIFEST.in | 2 ++ setup.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/MANIFEST.in b/MANIFEST.in index d7be1e386e..b6f66a9195 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -10,3 +10,5 @@ include gensim/models/word2vec_inner.pyx include gensim/models/word2vec_inner.pxd include gensim/models/doc2vec_inner.c include gensim/models/doc2vec_inner.pyx +include gensim/models/fasttext_inner.c +include gensim/models/fasttext_inner.pyx diff --git a/setup.py b/setup.py index 503cbac485..5644a25e25 100644 --- a/setup.py +++ b/setup.py @@ -254,6 +254,9 @@ def finalize_options(self): include_dirs=[model_dir]), Extension('gensim.models.doc2vec_inner', sources=['./gensim/models/doc2vec_inner.c'], + include_dirs=[model_dir]), + Extension('gensim.models.fasttext_inner', + sources=['./gensim/models/fasttext_inner.c'], include_dirs=[model_dir]) ], cmdclass=cmdclass, From fa3aa4991aad469d23304dbd99b74d0537e0748a Mon Sep 17 00:00:00 2001 From: manneshiva Date: Sun, 26 Nov 2017 17:58:28 +0530 Subject: [PATCH 02/22] cythonizes training using skipgram with negative sampling --- gensim/models/fasttext.py | 181 +++++++++++------------ gensim/models/fasttext_inner.pyx | 246 +++++++++++++++++++++++++++++++ 2 files changed, 332 insertions(+), 95 deletions(-) create mode 100644 gensim/models/fasttext_inner.pyx diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index 6174754314..a72c93322f 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -9,77 +9,85 @@ from gensim.models.word2vec import Word2Vec, train_sg_pair, train_cbow_pair from gensim.models.wrappers.fasttext import FastTextKeyedVectors from gensim.models.wrappers.fasttext import FastText as Ft_Wrapper, compute_ngrams, ft_hash +from gensim import matutils logger = logging.getLogger(__name__) -MAX_WORDS_IN_BATCH = 10000 - - -def train_batch_cbow(model, sentences, alpha, work=None, neu1=None): - result = 0 - for sentence in sentences: - word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and - model.wv.vocab[w].sample_int > model.random.rand() * 2**32] - for pos, word in enumerate(word_vocabs): - reduced_window = model.random.randint(model.window) - start = max(0, pos - model.window + reduced_window) - window_pos = enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start) - word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)] - - word2_subwords = [] - vocab_subwords_indices = [] - ngrams_subwords_indices = [] - - for index in word2_indices: - vocab_subwords_indices += [index] - word2_subwords += model.wv.ngrams_word[model.wv.index2word[index]] - - for subword in word2_subwords: - ngrams_subwords_indices.append(model.wv.ngrams[subword]) - - l1_vocab = np_sum(model.wv.syn0_vocab[vocab_subwords_indices], axis=0) # 1 x vector_size - l1_ngrams = np_sum(model.wv.syn0_ngrams[ngrams_subwords_indices], axis=0) # 1 x vector_size - - l1 = np_sum([l1_vocab, l1_ngrams], axis=0) - subwords_indices = [vocab_subwords_indices] + [ngrams_subwords_indices] - if (subwords_indices[0] or subwords_indices[1]) and model.cbow_mean: - l1 /= (len(subwords_indices[0]) + len(subwords_indices[1])) - - # train on the sliding window for target word - train_cbow_pair(model, word, subwords_indices, l1, alpha, is_ft=True) - result += len(word_vocabs) - return result - - -def train_batch_sg(model, sentences, alpha, work=None): - result = 0 - for sentence in sentences: - word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and - model.wv.vocab[w].sample_int > model.random.rand() * 2**32] - for pos, word in enumerate(word_vocabs): - reduced_window = model.random.randint(model.window) # `b` in the original word2vec code - # now go over all words from the (reduced) window, predicting each one in turn - start = max(0, pos - model.window + reduced_window) - - subwords_indices = [word.index] - word2_subwords = model.wv.ngrams_word[model.wv.index2word[word.index]] - - for subword in word2_subwords: - subwords_indices.append(model.wv.ngrams[subword]) - - for pos2, word2 in enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start): - if pos2 != pos: # don't train on the `word` itself - train_sg_pair(model, model.wv.index2word[word2.index], subwords_indices, alpha, is_ft=True) - - result += len(word_vocabs) - return result +try: + # TODO : log FAST_VERSION + from gensim.models.fasttext_inner import train_batch_sg + from gensim.models.fasttext_inner import FAST_VERSION, MAX_WORDS_IN_BATCH + +except ImportError: + # why falling back - log + # failed... fall back to plain numpy (20-80x slower training than the above) + FAST_VERSION = -1 + MAX_WORDS_IN_BATCH = 10000 + + def train_batch_cbow(model, sentences, alpha, work=None, neu1=None): + result = 0 + for sentence in sentences: + word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and + model.wv.vocab[w].sample_int > model.random.rand() * 2**32] + for pos, word in enumerate(word_vocabs): + reduced_window = model.random.randint(model.window) + start = max(0, pos - model.window + reduced_window) + window_pos = enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start) + word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)] + + word2_subwords = [] + vocab_subwords_indices = [] + ngrams_subwords_indices = [] + + for index in word2_indices: + vocab_subwords_indices += [index] + word2_subwords += model.wv.ngrams_word[model.wv.index2word[index]] + + for subword in word2_subwords: + ngrams_subwords_indices.append(model.wv.ngrams[subword]) + + l1_vocab = np_sum(model.wv.syn0_vocab[vocab_subwords_indices], axis=0) # 1 x vector_size + l1_ngrams = np_sum(model.wv.syn0_ngrams[ngrams_subwords_indices], axis=0) # 1 x vector_size + + l1 = np_sum([l1_vocab, l1_ngrams], axis=0) + subwords_indices = [vocab_subwords_indices] + [ngrams_subwords_indices] + if (subwords_indices[0] or subwords_indices[1]) and model.cbow_mean: + l1 /= (len(subwords_indices[0]) + len(subwords_indices[1])) + + train_cbow_pair(model, word, subwords_indices, l1, alpha, is_ft=True) # train on the sliding window for target word + result += len(word_vocabs) + return result + + def train_batch_sg(model, sentences, alpha, work=None, neu1=None): + result = 0 + for sentence in sentences: + word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and + model.wv.vocab[w].sample_int > model.random.rand() * 2**32] + for pos, word in enumerate(word_vocabs): + reduced_window = model.random.randint(model.window) # `b` in the original word2vec code + # now go over all words from the (reduced) window, predicting each one in turn + start = max(0, pos - model.window + reduced_window) + + subwords_indices = [word.index] + word2_subwords = model.wv.ngrams_word[model.wv.index2word[word.index]] + + for subword in word2_subwords: + subwords_indices.append(model.wv.ngrams[subword]) + + for pos2, word2 in enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start): + if pos2 != pos: # don't train on the `word` itself + train_sg_pair(model, model.wv.index2word[word2.index], subwords_indices, alpha, is_ft=True) + + result += len(word_vocabs) + return result class FastText(Word2Vec): - def __init__(self, sentences=None, sg=0, hs=0, size=100, alpha=0.025, window=5, min_count=5, - max_vocab_size=None, word_ngrams=1, loss='ns', sample=1e-3, seed=1, workers=3, min_alpha=0.0001, - negative=5, cbow_mean=1, hashfxn=hash, iter=5, null_word=0, min_n=3, max_n=6, - sorted_vocab=1, bucket=2000000, trim_rule=None, batch_words=MAX_WORDS_IN_BATCH): + def __init__( + self, sentences=None, sg=0, hs=0, size=100, alpha=0.025, window=5, min_count=5, + max_vocab_size=None, word_ngrams=1, loss='ns', sample=1e-3, seed=1, workers=3, min_alpha=0.0001, + negative=5, cbow_mean=1, hashfxn=hash, iter=5, null_word=0, min_n=3, max_n=6, sorted_vocab=1, bucket=2000000, + trim_rule=None, batch_words=MAX_WORDS_IN_BATCH): # fastText specific params self.bucket = bucket @@ -89,12 +97,10 @@ def __init__(self, sentences=None, sg=0, hs=0, size=100, alpha=0.025, window=5, if self.word_ngrams <= 1 and self.max_n == 0: self.bucket = 0 - super(FastText, self).__init__( - sentences=sentences, size=size, alpha=alpha, window=window, min_count=min_count, + super(FastText, self).__init__(sentences=sentences, size=size, alpha=alpha, window=window, min_count=min_count, max_vocab_size=max_vocab_size, sample=sample, seed=seed, workers=workers, min_alpha=min_alpha, sg=sg, hs=hs, negative=negative, cbow_mean=cbow_mean, hashfxn=hashfxn, iter=iter, null_word=null_word, - trim_rule=trim_rule, sorted_vocab=sorted_vocab, batch_words=batch_words - ) + trim_rule=trim_rule, sorted_vocab=sorted_vocab, batch_words=batch_words) def initialize_word_vectors(self): self.wv = FastTextKeyedVectors() @@ -104,16 +110,13 @@ def initialize_word_vectors(self): def build_vocab(self, sentences, keep_raw_vocab=False, trim_rule=None, progress_per=10000, update=False): if update: if not len(self.wv.vocab): - raise RuntimeError( - "You cannot do an online vocabulary-update of a model which has no prior vocabulary. " - "First build the vocabulary of your model with a corpus before doing an online update." - ) + raise RuntimeError("You cannot do an online vocabulary-update of a model which has no prior vocabulary. " + "First build the vocabulary of your model with a corpus " + "before doing an online update.") self.old_vocab_len = len(self.wv.vocab) self.old_hash2index_len = len(self.wv.hash2index) - super(FastText, self).build_vocab( - sentences, keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule, progress_per=progress_per, update=update - ) + super(FastText, self).build_vocab(sentences, keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule, progress_per=progress_per, update=update) self.init_ngrams(update=update) def init_ngrams(self, update=False): @@ -170,18 +173,10 @@ def init_ngrams(self, update=False): rand_obj = np.random rand_obj.seed(self.seed) - new_vocab_rows = rand_obj.uniform( - -1.0 / self.vector_size, 1.0 / self.vector_size, - (len(self.wv.vocab) - self.old_vocab_len, self.vector_size) - ) + new_vocab_rows = rand_obj.uniform(-1.0 / self.vector_size, 1.0 / self.vector_size, (len(self.wv.vocab) - self.old_vocab_len, self.vector_size)) new_vocab_lockf_rows = ones((len(self.wv.vocab) - self.old_vocab_len, self.vector_size), dtype=REAL) - new_ngram_rows = rand_obj.uniform( - -1.0 / self.vector_size, 1.0 / self.vector_size, - (len(self.wv.hash2index) - self.old_hash2index_len, self.vector_size) - ) - new_ngram_lockf_rows = ones( - (len(self.wv.hash2index) - self.old_hash2index_len, self.vector_size), dtype=REAL - ) + new_ngram_rows = rand_obj.uniform(-1.0 / self.vector_size, 1.0 / self.vector_size, (len(self.wv.hash2index) - self.old_hash2index_len, self.vector_size)) + new_ngram_lockf_rows = ones((len(self.wv.hash2index) - self.old_hash2index_len, self.vector_size), dtype=REAL) self.wv.syn0_vocab = vstack([self.wv.syn0_vocab, new_vocab_rows]) self.syn0_vocab_lockf = vstack([self.syn0_vocab_lockf, new_vocab_lockf_rows]) @@ -192,19 +187,15 @@ def reset_ngram_weights(self): rand_obj = np.random rand_obj.seed(self.seed) for index in range(len(self.wv.vocab)): - self.wv.syn0_vocab[index] = rand_obj.uniform( - -1.0 / self.vector_size, 1.0 / self.vector_size, self.vector_size - ) + self.wv.syn0_vocab[index] = rand_obj.uniform(-1.0 / self.vector_size, 1.0 / self.vector_size, self.vector_size) for index in range(len(self.wv.hash2index)): - self.wv.syn0_ngrams[index] = rand_obj.uniform( - -1.0 / self.vector_size, 1.0 / self.vector_size, self.vector_size - ) + self.wv.syn0_ngrams[index] = rand_obj.uniform(-1.0 / self.vector_size, 1.0 / self.vector_size, self.vector_size) def _do_train_job(self, sentences, alpha, inits): work, neu1 = inits tally = 0 if self.sg: - tally += train_batch_sg(self, sentences, alpha, work) + tally += train_batch_sg(self, sentences, alpha, work, neu1) else: tally += train_batch_cbow(self, sentences, alpha, work, neu1) @@ -245,4 +236,4 @@ def load_fasttext_format(cls, *args, **kwargs): def save(self, *args, **kwargs): kwargs['ignore'] = kwargs.get('ignore', ['syn0norm', 'syn0_vocab_norm', 'syn0_ngrams_norm']) - super(FastText, self).save(*args, **kwargs) + super(FastText, self).save(*args, **kwargs) \ No newline at end of file diff --git a/gensim/models/fasttext_inner.pyx b/gensim/models/fasttext_inner.pyx new file mode 100644 index 0000000000..252a3958e5 --- /dev/null +++ b/gensim/models/fasttext_inner.pyx @@ -0,0 +1,246 @@ +#!/usr/bin/env cython +# coding: utf-8 + +import cython +import numpy as np +cimport numpy as np + +from libc.math cimport exp +from libc.math cimport log +from libc.string cimport memset +from libc.stdio cimport printf +from libc.stdlib cimport calloc, free +from gensim import matutils + + +# scipy <= 0.15 +try: + from scipy.linalg.blas import fblas +except ImportError: + # in scipy > 0.15, fblas function has been removed + import scipy.linalg.blas as fblas + +from word2vec_inner cimport bisect_left, random_int32, \ + scopy, saxpy, sdot, dsdot, snrm2, sscal, \ + REAL_t, EXP_TABLE, \ + our_dot, our_saxpy, \ + our_dot_double, our_dot_float, our_dot_noblas, our_saxpy_noblas + +REAL = np.float32 + +DEF MAX_SENTENCE_LEN = 10000 +DEF MAX_SUBWORDS = 1000 +from word2vec import FAST_VERSION + +DEF EXP_TABLE_SIZE = 1000 +DEF MAX_EXP = 6 + +cdef REAL_t[EXP_TABLE_SIZE] EXP_TABLE +cdef REAL_t[EXP_TABLE_SIZE] LOG_TABLE + +cdef int ONE = 1 +cdef REAL_t ONEF = 1.0 + +cdef unsigned long long fast_sentence_sg_neg( + const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, + REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1neg, const int size, + const np.uint32_t word_index, const np.uint32_t *subwords_index, const np.uint32_t subwords_len, + const REAL_t alpha, REAL_t *work, REAL_t *l1, unsigned long long next_random, REAL_t *word_locks_vocab, + REAL_t *word_locks_ngrams) nogil: + + cdef long long a + cdef np.uint32_t word2_index = subwords_index[0] + cdef long long row1 = word2_index * size, row2 + cdef unsigned long long modulo = 281474976710655ULL + cdef REAL_t f, g, label, f_dot, log_e_f_dot + cdef np.uint32_t target_index + cdef int d + + memset(work, 0, size * cython.sizeof(REAL_t)) + memset(l1, 0, size * cython.sizeof(REAL_t)) + # cdef REAL_t *l1 + # l1 = calloc(size, cython.sizeof(REAL_t)) + scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) + for d in range(1, subwords_len): + subword_row = subwords_index[d] * size + our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) + cdef REAL_t norm_factor = ONEF / subwords_len + sscal(&size, &norm_factor, l1 , &ONE) + + for d in range(negative+1): + if d == 0: + target_index = word_index + label = ONEF + else: + target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + next_random = (next_random * 25214903917ULL + 11) & modulo + if target_index == word_index: + continue + label = 0.0 + + row2 = target_index * size + f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) + if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: + continue + f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + g = (label - f) * alpha + our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + our_saxpy(&size, &g, l1, &ONE, &syn1neg[row2], &ONE) + our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) + for d in range(1, subwords_len): + our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) + return next_random + + +def train_batch_sg(model, sentences, alpha, _work, _l1): + cdef int hs = model.hs + cdef int negative = model.negative + cdef int sample = (model.sample != 0) + + cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) + cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) + cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) + cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) + + cdef REAL_t *work + cdef REAL_t *l1 + + cdef REAL_t _alpha = alpha + cdef int size = model.layer1_size + + cdef int codelens[MAX_SENTENCE_LEN] + cdef np.uint32_t indexes[MAX_SENTENCE_LEN] + cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] + cdef int sentence_idx[MAX_SENTENCE_LEN + 1] + cdef int window = model.window + + cdef int i, j, k + cdef int effective_words = 0, effective_sentences = 0 + cdef int sent_idx, idx_start, idx_end + + # For negative sampling + cdef REAL_t *syn1neg + cdef np.uint32_t *cum_table + cdef unsigned long long cum_table_len + # for sampling (negative and frequent-word downsampling) + cdef unsigned long long next_random + + # For passing subwords information as C objects for nogil + cdef np.uint32_t subwords_idx_len[MAX_SENTENCE_LEN] + cdef np.uint32_t subwords_idx[MAX_SENTENCE_LEN][MAX_SUBWORDS] + + + if negative: + syn1neg = (np.PyArray_DATA(model.syn1neg)) + cum_table = (np.PyArray_DATA(model.cum_table)) + cum_table_len = len(model.cum_table) + if negative or sample: + next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) + + # convert Python structures to primitive types, so we can release the GIL + work = np.PyArray_DATA(_work) + l1 = np.PyArray_DATA(_l1) + # l1 = calloc(size, cython.sizeof(REAL_t)) + + # prepare C structures so we can go "full C" and release the Python GIL + vlookup = model.wv.vocab + sentence_idx[0] = 0 # indices of the first sentence always start at 0 + for sent in sentences: + if not sent: + continue # ignore empty sentences; leave effective_sentences unchanged + for token in sent: + word = vlookup[token] if token in vlookup else None + if word is None: + continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window + if sample and word.sample_int < random_int32(&next_random): + continue + indexes[effective_words] = word.index + + effective_words += 1 + if effective_words == MAX_SENTENCE_LEN: + break # TODO: log warning, tally overflow? + + # keep track of which words go into which sentence, so we don't train + # across sentence boundaries. + # indices of sentence number X are between = effective_words: + break + word_subwords = model.wv.ngrams_word[model.wv.index2word[word_idx]] + subwords_idx_len[i] = len(word_subwords) + 1 + subwords_idx[i][0] = word_idx + for j, subword in enumerate(word_subwords): + subwords_idx[i][j+1] = model.wv.ngrams[subword] + + with nogil: + for sent_idx in range(effective_sentences): + idx_start = sentence_idx[sent_idx] + idx_end = sentence_idx[sent_idx + 1] + for i in range(idx_start, idx_end): + j = i - window + reduced_windows[i] + if j < idx_start: + j = idx_start + k = i + window + 1 - reduced_windows[i] + if k > idx_end: + k = idx_end + for j in range(j, k): + if j == i: + continue + if negative: + next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) + + return effective_words + + +def init(): + """ + Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized + into table EXP_TABLE. Also calculate log(sigmoid(x)) into LOG_TABLE. + """ + global our_dot + global our_saxpy + + cdef int i + cdef float *x = [10.0] + cdef float *y = [0.01] + cdef float expected = 0.1 + cdef int size = 1 + cdef double d_res + cdef float *p_res + + # build the sigmoid table + for i in range(EXP_TABLE_SIZE): + EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) + EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) + LOG_TABLE[i] = log( EXP_TABLE[i] ) + + # check whether sdot returns double or float + d_res = dsdot(&size, x, &ONE, y, &ONE) + p_res = &d_res + if (abs(d_res - expected) < 0.0001): + our_dot = our_dot_double + our_saxpy = saxpy + return 0 # double + elif (abs(p_res[0] - expected) < 0.0001): + our_dot = our_dot_float + our_saxpy = saxpy + return 1 # float + else: + # neither => use cython loops, no BLAS + # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here + our_dot = our_dot_noblas + our_saxpy = our_saxpy_noblas + return 2 + +FAST_VERSION = init() # initialize the module +MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN \ No newline at end of file From da45be330712440619652bfc358d91431248a31e Mon Sep 17 00:00:00 2001 From: manneshiva Date: Mon, 27 Nov 2017 03:15:22 +0530 Subject: [PATCH 03/22] loop over indexes using index iterator --- gensim/models/fasttext_inner.pyx | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gensim/models/fasttext_inner.pyx b/gensim/models/fasttext_inner.pyx index 252a3958e5..f92ed074e5 100644 --- a/gensim/models/fasttext_inner.pyx +++ b/gensim/models/fasttext_inner.pyx @@ -173,9 +173,8 @@ def train_batch_sg(model, sentences, alpha, _work, _l1): for i, item in enumerate(model.random.randint(0, window, effective_words)): reduced_windows[i] = item - for i, word_idx in enumerate(indexes): - if i >= effective_words: - break + for i in range(effective_words): + word_idx = indexes[i] word_subwords = model.wv.ngrams_word[model.wv.index2word[word_idx]] subwords_idx_len[i] = len(word_subwords) + 1 subwords_idx[i][0] = word_idx From a8df8e116689b4afde384eacb94d12f8f531b736 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Mon, 27 Nov 2017 11:16:47 +0530 Subject: [PATCH 04/22] cythonizes training using skipgram with hierarchical softmax --- gensim/models/fasttext_inner.pyx | 55 +++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/gensim/models/fasttext_inner.pyx b/gensim/models/fasttext_inner.pyx index f92ed074e5..9dc6f34ec2 100644 --- a/gensim/models/fasttext_inner.pyx +++ b/gensim/models/fasttext_inner.pyx @@ -44,7 +44,7 @@ cdef REAL_t ONEF = 1.0 cdef unsigned long long fast_sentence_sg_neg( const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1neg, const int size, - const np.uint32_t word_index, const np.uint32_t *subwords_index, const np.uint32_t subwords_len, + const np.uint32_t word_index, np.uint32_t *subwords_index, const np.uint32_t subwords_len, const REAL_t alpha, REAL_t *work, REAL_t *l1, unsigned long long next_random, REAL_t *word_locks_vocab, REAL_t *word_locks_ngrams) nogil: @@ -58,8 +58,7 @@ cdef unsigned long long fast_sentence_sg_neg( memset(work, 0, size * cython.sizeof(REAL_t)) memset(l1, 0, size * cython.sizeof(REAL_t)) - # cdef REAL_t *l1 - # l1 = calloc(size, cython.sizeof(REAL_t)) + scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) for d in range(1, subwords_len): subword_row = subwords_index[d] * size @@ -91,6 +90,42 @@ cdef unsigned long long fast_sentence_sg_neg( our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) return next_random +cdef void fast_sentence_sg_hs( + const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, + REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1, const int size, + np.uint32_t *subwords_index, const np.uint32_t subwords_len, + const REAL_t alpha, REAL_t *work, REAL_t *l1, REAL_t *word_locks_vocab, + REAL_t *word_locks_ngrams) nogil: + + cdef long long a, b + cdef np.uint32_t word2_index = subwords_index[0] + cdef long long row1 = word2_index * size, row2, sgn + cdef REAL_t f, g, f_dot, lprob + + memset(work, 0, size * cython.sizeof(REAL_t)) + memset(l1, 0, size * cython.sizeof(REAL_t)) + + scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) + for d in range(1, subwords_len): + subword_row = subwords_index[d] * size + our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) + cdef REAL_t norm_factor = ONEF / subwords_len + sscal(&size, &norm_factor, l1 , &ONE) + + for b in range(codelen): + row2 = word_point[b] * size + f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) + if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: + continue + f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + g = (1 - word_code[b] - f) * alpha + + our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + our_saxpy(&size, &g, l1, &ONE, &syn1[row2], &ONE) + + our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) + for d in range(1, subwords_len): + our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) def train_batch_sg(model, sentences, alpha, _work, _l1): cdef int hs = model.hs @@ -118,6 +153,11 @@ def train_batch_sg(model, sentences, alpha, _work, _l1): cdef int effective_words = 0, effective_sentences = 0 cdef int sent_idx, idx_start, idx_end + # For hierarchical softmax + cdef REAL_t *syn1 + cdef np.uint32_t *points[MAX_SENTENCE_LEN] + cdef np.uint8_t *codes[MAX_SENTENCE_LEN] + # For negative sampling cdef REAL_t *syn1neg cdef np.uint32_t *cum_table @@ -129,6 +169,8 @@ def train_batch_sg(model, sentences, alpha, _work, _l1): cdef np.uint32_t subwords_idx_len[MAX_SENTENCE_LEN] cdef np.uint32_t subwords_idx[MAX_SENTENCE_LEN][MAX_SUBWORDS] + if hs: + syn1 = (np.PyArray_DATA(model.syn1)) if negative: syn1neg = (np.PyArray_DATA(model.syn1neg)) @@ -140,7 +182,6 @@ def train_batch_sg(model, sentences, alpha, _work, _l1): # convert Python structures to primitive types, so we can release the GIL work = np.PyArray_DATA(_work) l1 = np.PyArray_DATA(_l1) - # l1 = calloc(size, cython.sizeof(REAL_t)) # prepare C structures so we can go "full C" and release the Python GIL vlookup = model.wv.vocab @@ -155,6 +196,10 @@ def train_batch_sg(model, sentences, alpha, _work, _l1): if sample and word.sample_int < random_int32(&next_random): continue indexes[effective_words] = word.index + if hs: + codelens[effective_words] = len(word.code) + codes[effective_words] = np.PyArray_DATA(word.code) + points[effective_words] = np.PyArray_DATA(word.point) effective_words += 1 if effective_words == MAX_SENTENCE_LEN: @@ -195,6 +240,8 @@ def train_batch_sg(model, sentences, alpha, _work, _l1): for j in range(j, k): if j == i: continue + if hs: + fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) if negative: next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) From 1e51a92b4c515471e3acd2262b99eb0935166544 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Mon, 27 Nov 2017 15:11:05 +0530 Subject: [PATCH 05/22] adds cython generated .c file --- gensim/models/fasttext_inner.c | 10855 +++++++++++++++++++++++++++++++ 1 file changed, 10855 insertions(+) create mode 100644 gensim/models/fasttext_inner.c diff --git a/gensim/models/fasttext_inner.c b/gensim/models/fasttext_inner.c new file mode 100644 index 0000000000..9f90c6296d --- /dev/null +++ b/gensim/models/fasttext_inner.c @@ -0,0 +1,10855 @@ +/* Generated by Cython 0.27.3 */ + +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#ifndef Py_PYTHON_H + #error Python headers needed to compile C extensions, please install development version of Python. +#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000) + #error Cython requires Python 2.6+ or Python 3.3+. +#else +#define CYTHON_ABI "0_27_3" +#define CYTHON_FUTURE_DIVISION 0 +#include +#ifndef offsetof + #define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) +#endif +#if !defined(WIN32) && !defined(MS_WINDOWS) + #ifndef __stdcall + #define __stdcall + #endif + #ifndef __cdecl + #define __cdecl + #endif + #ifndef __fastcall + #define __fastcall + #endif +#endif +#ifndef DL_IMPORT + #define DL_IMPORT(t) t +#endif +#ifndef DL_EXPORT + #define DL_EXPORT(t) t +#endif +#define __PYX_COMMA , +#ifndef HAVE_LONG_LONG + #if PY_VERSION_HEX >= 0x02070000 + #define HAVE_LONG_LONG + #endif +#endif +#ifndef PY_LONG_LONG + #define PY_LONG_LONG LONG_LONG +#endif +#ifndef Py_HUGE_VAL + #define Py_HUGE_VAL HUGE_VAL +#endif +#ifdef PYPY_VERSION + #define CYTHON_COMPILING_IN_PYPY 1 + #define CYTHON_COMPILING_IN_PYSTON 0 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #undef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 0 + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #if PY_VERSION_HEX < 0x03050000 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #undef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 0 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #undef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 1 + #undef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 0 + #undef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 0 + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 +#elif defined(PYSTON_VERSION) + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_PYSTON 1 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #ifndef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 1 + #endif + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #ifndef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 1 + #endif + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #ifndef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 1 + #endif + #ifndef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 1 + #endif + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #undef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 0 + #undef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 0 +#else + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_PYSTON 0 + #define CYTHON_COMPILING_IN_CPYTHON 1 + #ifndef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 1 + #endif + #if PY_VERSION_HEX < 0x02070000 + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #elif !defined(CYTHON_USE_PYTYPE_LOOKUP) + #define CYTHON_USE_PYTYPE_LOOKUP 1 + #endif + #if PY_MAJOR_VERSION < 3 + #undef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 0 + #elif !defined(CYTHON_USE_ASYNC_SLOTS) + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #if PY_VERSION_HEX < 0x02070000 + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #elif !defined(CYTHON_USE_PYLONG_INTERNALS) + #define CYTHON_USE_PYLONG_INTERNALS 1 + #endif + #ifndef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 1 + #endif + #ifndef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 1 + #endif + #if PY_VERSION_HEX < 0x030300F0 + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #elif !defined(CYTHON_USE_UNICODE_WRITER) + #define CYTHON_USE_UNICODE_WRITER 1 + #endif + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #ifndef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 1 + #endif + #ifndef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 1 + #endif + #ifndef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 1 + #endif + #ifndef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 1 + #endif + #ifndef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT (0 && PY_VERSION_HEX >= 0x03050000) + #endif + #ifndef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1) + #endif +#endif +#if !defined(CYTHON_FAST_PYCCALL) +#define CYTHON_FAST_PYCCALL (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1) +#endif +#if CYTHON_USE_PYLONG_INTERNALS + #include "longintrepr.h" + #undef SHIFT + #undef BASE + #undef MASK +#endif +#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 && !defined(Py_OptimizeFlag) + #define Py_OptimizeFlag 0 +#endif +#define __PYX_BUILD_PY_SSIZE_T "n" +#define CYTHON_FORMAT_SSIZE_T "z" +#if PY_MAJOR_VERSION < 3 + #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" + #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) + #define __Pyx_DefaultClassType PyClass_Type +#else + #define __Pyx_BUILTIN_MODULE_NAME "builtins" + #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ + PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) + #define __Pyx_DefaultClassType PyType_Type +#endif +#ifndef Py_TPFLAGS_CHECKTYPES + #define Py_TPFLAGS_CHECKTYPES 0 +#endif +#ifndef Py_TPFLAGS_HAVE_INDEX + #define Py_TPFLAGS_HAVE_INDEX 0 +#endif +#ifndef Py_TPFLAGS_HAVE_NEWBUFFER + #define Py_TPFLAGS_HAVE_NEWBUFFER 0 +#endif +#ifndef Py_TPFLAGS_HAVE_FINALIZE + #define Py_TPFLAGS_HAVE_FINALIZE 0 +#endif +#if PY_VERSION_HEX < 0x030700A0 || !defined(METH_FASTCALL) + #ifndef METH_FASTCALL + #define METH_FASTCALL 0x80 + #endif + typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject **args, Py_ssize_t nargs); + typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, PyObject **args, + Py_ssize_t nargs, PyObject *kwnames); +#else + #define __Pyx_PyCFunctionFast _PyCFunctionFast + #define __Pyx_PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords +#endif +#if CYTHON_FAST_PYCCALL +#define __Pyx_PyFastCFunction_Check(func)\ + ((PyCFunction_Check(func) && (METH_FASTCALL == (PyCFunction_GET_FLAGS(func) & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_KEYWORDS))))) +#else +#define __Pyx_PyFastCFunction_Check(func) 0 +#endif +#if !CYTHON_FAST_THREAD_STATE || PY_VERSION_HEX < 0x02070000 + #define __Pyx_PyThreadState_Current PyThreadState_GET() +#elif PY_VERSION_HEX >= 0x03060000 + #define __Pyx_PyThreadState_Current _PyThreadState_UncheckedGet() +#elif PY_VERSION_HEX >= 0x03000000 + #define __Pyx_PyThreadState_Current PyThreadState_GET() +#else + #define __Pyx_PyThreadState_Current _PyThreadState_Current +#endif +#if CYTHON_COMPILING_IN_CPYTHON || defined(_PyDict_NewPresized) +#define __Pyx_PyDict_NewPresized(n) ((n <= 8) ? PyDict_New() : _PyDict_NewPresized(n)) +#else +#define __Pyx_PyDict_NewPresized(n) PyDict_New() +#endif +#if PY_MAJOR_VERSION >= 3 || CYTHON_FUTURE_DIVISION + #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y) +#else + #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y) + #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y) +#endif +#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) + #define CYTHON_PEP393_ENABLED 1 + #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\ + 0 : _PyUnicode_Ready((PyObject *)(op))) + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) PyUnicode_MAX_CHAR_VALUE(u) + #define __Pyx_PyUnicode_KIND(u) PyUnicode_KIND(u) + #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u) + #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) + #define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, ch) + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u))) +#else + #define CYTHON_PEP393_ENABLED 0 + #define PyUnicode_1BYTE_KIND 1 + #define PyUnicode_2BYTE_KIND 2 + #define PyUnicode_4BYTE_KIND 4 + #define __Pyx_PyUnicode_READY(op) (0) + #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u) + #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i])) + #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((sizeof(Py_UNICODE) == 2) ? 65535 : 1114111) + #define __Pyx_PyUnicode_KIND(u) (sizeof(Py_UNICODE)) + #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u)) + #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i])) + #define __Pyx_PyUnicode_WRITE(k, d, i, ch) (((void)(k)), ((Py_UNICODE*)d)[i] = ch) + #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_SIZE(u)) +#endif +#if CYTHON_COMPILING_IN_PYPY + #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b) +#else + #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b) + #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\ + PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) +#endif +#if CYTHON_COMPILING_IN_PYPY && !defined(PyUnicode_Contains) + #define PyUnicode_Contains(u, s) PySequence_Contains(u, s) +#endif +#if CYTHON_COMPILING_IN_PYPY && !defined(PyByteArray_Check) + #define PyByteArray_Check(obj) PyObject_TypeCheck(obj, &PyByteArray_Type) +#endif +#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Format) + #define PyObject_Format(obj, fmt) PyObject_CallMethod(obj, "__format__", "O", fmt) +#endif +#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc) + #define PyObject_Malloc(s) PyMem_Malloc(s) + #define PyObject_Free(p) PyMem_Free(p) + #define PyObject_Realloc(p) PyMem_Realloc(p) +#endif +#if CYTHON_COMPILING_IN_PYSTON + #define __Pyx_PyCode_HasFreeVars(co) PyCode_HasFreeVars(co) + #define __Pyx_PyFrame_SetLineNumber(frame, lineno) PyFrame_SetLineNumber(frame, lineno) +#else + #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) + #define __Pyx_PyFrame_SetLineNumber(frame, lineno) (frame)->f_lineno = (lineno) +#endif +#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b)) +#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b)) +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b) +#else + #define __Pyx_PyString_Format(a, b) PyString_Format(a, b) +#endif +#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII) + #define PyObject_ASCII(o) PyObject_Repr(o) +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyBaseString_Type PyUnicode_Type + #define PyStringObject PyUnicodeObject + #define PyString_Type PyUnicode_Type + #define PyString_Check PyUnicode_Check + #define PyString_CheckExact PyUnicode_CheckExact +#endif +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj) + #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj) +#else + #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj)) + #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj)) +#endif +#ifndef PySet_CheckExact + #define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type) +#endif +#define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception) +#if PY_MAJOR_VERSION >= 3 + #define PyIntObject PyLongObject + #define PyInt_Type PyLong_Type + #define PyInt_Check(op) PyLong_Check(op) + #define PyInt_CheckExact(op) PyLong_CheckExact(op) + #define PyInt_FromString PyLong_FromString + #define PyInt_FromUnicode PyLong_FromUnicode + #define PyInt_FromLong PyLong_FromLong + #define PyInt_FromSize_t PyLong_FromSize_t + #define PyInt_FromSsize_t PyLong_FromSsize_t + #define PyInt_AsLong PyLong_AsLong + #define PyInt_AS_LONG PyLong_AS_LONG + #define PyInt_AsSsize_t PyLong_AsSsize_t + #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask + #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask + #define PyNumber_Int PyNumber_Long +#endif +#if PY_MAJOR_VERSION >= 3 + #define PyBoolObject PyLongObject +#endif +#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY + #ifndef PyUnicode_InternFromString + #define PyUnicode_InternFromString(s) PyUnicode_FromString(s) + #endif +#endif +#if PY_VERSION_HEX < 0x030200A4 + typedef long Py_hash_t; + #define __Pyx_PyInt_FromHash_t PyInt_FromLong + #define __Pyx_PyInt_AsHash_t PyInt_AsLong +#else + #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t + #define __Pyx_PyInt_AsHash_t PyInt_AsSsize_t +#endif +#if PY_MAJOR_VERSION >= 3 + #define __Pyx_PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func)) +#else + #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass) +#endif +#ifndef __has_attribute + #define __has_attribute(x) 0 +#endif +#ifndef __has_cpp_attribute + #define __has_cpp_attribute(x) 0 +#endif +#if CYTHON_USE_ASYNC_SLOTS + #if PY_VERSION_HEX >= 0x030500B1 + #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods + #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async) + #else + #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved)) + #endif +#else + #define __Pyx_PyType_AsAsync(obj) NULL +#endif +#ifndef __Pyx_PyAsyncMethodsStruct + typedef struct { + unaryfunc am_await; + unaryfunc am_aiter; + unaryfunc am_anext; + } __Pyx_PyAsyncMethodsStruct; +#endif +#ifndef CYTHON_RESTRICT + #if defined(__GNUC__) + #define CYTHON_RESTRICT __restrict__ + #elif defined(_MSC_VER) && _MSC_VER >= 1400 + #define CYTHON_RESTRICT __restrict + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define CYTHON_RESTRICT restrict + #else + #define CYTHON_RESTRICT + #endif +#endif +#ifndef CYTHON_UNUSED +# if defined(__GNUC__) +# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +# define CYTHON_UNUSED __attribute__ ((__unused__)) +# else +# define CYTHON_UNUSED +# endif +# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER)) +# define CYTHON_UNUSED __attribute__ ((__unused__)) +# else +# define CYTHON_UNUSED +# endif +#endif +#ifndef CYTHON_MAYBE_UNUSED_VAR +# if defined(__cplusplus) + template void CYTHON_MAYBE_UNUSED_VAR( const T& ) { } +# else +# define CYTHON_MAYBE_UNUSED_VAR(x) (void)(x) +# endif +#endif +#ifndef CYTHON_NCP_UNUSED +# if CYTHON_COMPILING_IN_CPYTHON +# define CYTHON_NCP_UNUSED +# else +# define CYTHON_NCP_UNUSED CYTHON_UNUSED +# endif +#endif +#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None) +#ifdef _MSC_VER + #ifndef _MSC_STDINT_H_ + #if _MSC_VER < 1300 + typedef unsigned char uint8_t; + typedef unsigned int uint32_t; + #else + typedef unsigned __int8 uint8_t; + typedef unsigned __int32 uint32_t; + #endif + #endif +#else + #include +#endif +#ifndef CYTHON_FALLTHROUGH + #if defined(__cplusplus) && __cplusplus >= 201103L + #if __has_cpp_attribute(fallthrough) + #define CYTHON_FALLTHROUGH [[fallthrough]] + #elif __has_cpp_attribute(clang::fallthrough) + #define CYTHON_FALLTHROUGH [[clang::fallthrough]] + #elif __has_cpp_attribute(gnu::fallthrough) + #define CYTHON_FALLTHROUGH [[gnu::fallthrough]] + #endif + #endif + #ifndef CYTHON_FALLTHROUGH + #if __has_attribute(fallthrough) + #define CYTHON_FALLTHROUGH __attribute__((fallthrough)) + #else + #define CYTHON_FALLTHROUGH + #endif + #endif + #if defined(__clang__ ) && defined(__apple_build_version__) + #if __apple_build_version__ < 7000000 + #undef CYTHON_FALLTHROUGH + #define CYTHON_FALLTHROUGH + #endif + #endif +#endif + +#ifndef CYTHON_INLINE + #if defined(__clang__) + #define CYTHON_INLINE __inline__ __attribute__ ((__unused__)) + #elif defined(__GNUC__) + #define CYTHON_INLINE __inline__ + #elif defined(_MSC_VER) + #define CYTHON_INLINE __inline + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define CYTHON_INLINE inline + #else + #define CYTHON_INLINE + #endif +#endif + +#if defined(WIN32) || defined(MS_WINDOWS) + #define _USE_MATH_DEFINES +#endif +#include +#ifdef NAN +#define __PYX_NAN() ((float) NAN) +#else +static CYTHON_INLINE float __PYX_NAN() { + float value; + memset(&value, 0xFF, sizeof(value)); + return value; +} +#endif +#if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL) +#define __Pyx_truncl trunc +#else +#define __Pyx_truncl truncl +#endif + + +#define __PYX_ERR(f_index, lineno, Ln_error) \ +{ \ + __pyx_filename = __pyx_f[f_index]; __pyx_lineno = lineno; __pyx_clineno = __LINE__; goto Ln_error; \ +} + +#ifndef __PYX_EXTERN_C + #ifdef __cplusplus + #define __PYX_EXTERN_C extern "C" + #else + #define __PYX_EXTERN_C extern + #endif +#endif + +#define __PYX_HAVE__gensim__models__fasttext_inner +#define __PYX_HAVE_API__gensim__models__fasttext_inner +#include +#include +#include "numpy/arrayobject.h" +#include "numpy/ufuncobject.h" +#include +#include +#include "voidptr.h" +#ifdef _OPENMP +#include +#endif /* _OPENMP */ + +#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS) +#define CYTHON_WITHOUT_ASSERTIONS +#endif + +typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding; + const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; + +#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0 +#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0 +#define __PYX_DEFAULT_STRING_ENCODING "" +#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString +#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize +#define __Pyx_uchar_cast(c) ((unsigned char)c) +#define __Pyx_long_cast(x) ((long)x) +#define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\ + (sizeof(type) < sizeof(Py_ssize_t)) ||\ + (sizeof(type) > sizeof(Py_ssize_t) &&\ + likely(v < (type)PY_SSIZE_T_MAX ||\ + v == (type)PY_SSIZE_T_MAX) &&\ + (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\ + v == (type)PY_SSIZE_T_MIN))) ||\ + (sizeof(type) == sizeof(Py_ssize_t) &&\ + (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\ + v == (type)PY_SSIZE_T_MAX))) ) +#if defined (__cplusplus) && __cplusplus >= 201103L + #include + #define __Pyx_sst_abs(value) std::abs(value) +#elif SIZEOF_INT >= SIZEOF_SIZE_T + #define __Pyx_sst_abs(value) abs(value) +#elif SIZEOF_LONG >= SIZEOF_SIZE_T + #define __Pyx_sst_abs(value) labs(value) +#elif defined (_MSC_VER) + #define __Pyx_sst_abs(value) ((Py_ssize_t)_abs64(value)) +#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define __Pyx_sst_abs(value) llabs(value) +#elif defined (__GNUC__) + #define __Pyx_sst_abs(value) __builtin_llabs(value) +#else + #define __Pyx_sst_abs(value) ((value<0) ? -value : value) +#endif +static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*); +static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); +#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s)) +#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l) +#define __Pyx_PyBytes_FromString PyBytes_FromString +#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*); +#if PY_MAJOR_VERSION < 3 + #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString + #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize +#else + #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString + #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize +#endif +#define __Pyx_PyBytes_AsWritableString(s) ((char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsWritableSString(s) ((signed char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsWritableUString(s) ((unsigned char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsString(s) ((const char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsSString(s) ((const signed char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyBytes_AsUString(s) ((const unsigned char*) PyBytes_AS_STRING(s)) +#define __Pyx_PyObject_AsWritableString(s) ((char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsWritableSString(s) ((signed char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsWritableUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsSString(s) ((const signed char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_AsUString(s) ((const unsigned char*) __Pyx_PyObject_AsString(s)) +#define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s) +#define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s) +#define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s) +#define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s) +#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s) +static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) { + const Py_UNICODE *u_end = u; + while (*u_end++) ; + return (size_t)(u_end - u - 1); +} +#define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u)) +#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode +#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode +#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj) +#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None) +#define __Pyx_PyBool_FromLong(b) ((b) ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False)) +static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); +static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x); +#define __Pyx_PySequence_Tuple(obj)\ + (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj)) +static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); +static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); +#if CYTHON_ASSUME_SAFE_MACROS +#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x)) +#else +#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x) +#endif +#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x)) +#if PY_MAJOR_VERSION >= 3 +#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x)) +#else +#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x)) +#endif +#define __Pyx_PyNumber_Float(x) (PyFloat_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Float(x)) +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII +static int __Pyx_sys_getdefaultencoding_not_ascii; +static int __Pyx_init_sys_getdefaultencoding_params(void) { + PyObject* sys; + PyObject* default_encoding = NULL; + PyObject* ascii_chars_u = NULL; + PyObject* ascii_chars_b = NULL; + const char* default_encoding_c; + sys = PyImport_ImportModule("sys"); + if (!sys) goto bad; + default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL); + Py_DECREF(sys); + if (!default_encoding) goto bad; + default_encoding_c = PyBytes_AsString(default_encoding); + if (!default_encoding_c) goto bad; + if (strcmp(default_encoding_c, "ascii") == 0) { + __Pyx_sys_getdefaultencoding_not_ascii = 0; + } else { + char ascii_chars[128]; + int c; + for (c = 0; c < 128; c++) { + ascii_chars[c] = c; + } + __Pyx_sys_getdefaultencoding_not_ascii = 1; + ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL); + if (!ascii_chars_u) goto bad; + ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL); + if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) { + PyErr_Format( + PyExc_ValueError, + "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.", + default_encoding_c); + goto bad; + } + Py_DECREF(ascii_chars_u); + Py_DECREF(ascii_chars_b); + } + Py_DECREF(default_encoding); + return 0; +bad: + Py_XDECREF(default_encoding); + Py_XDECREF(ascii_chars_u); + Py_XDECREF(ascii_chars_b); + return -1; +} +#endif +#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3 +#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL) +#else +#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL) +#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT +static char* __PYX_DEFAULT_STRING_ENCODING; +static int __Pyx_init_sys_getdefaultencoding_params(void) { + PyObject* sys; + PyObject* default_encoding = NULL; + char* default_encoding_c; + sys = PyImport_ImportModule("sys"); + if (!sys) goto bad; + default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); + Py_DECREF(sys); + if (!default_encoding) goto bad; + default_encoding_c = PyBytes_AsString(default_encoding); + if (!default_encoding_c) goto bad; + __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c)); + if (!__PYX_DEFAULT_STRING_ENCODING) goto bad; + strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c); + Py_DECREF(default_encoding); + return 0; +bad: + Py_XDECREF(default_encoding); + return -1; +} +#endif +#endif + + +/* Test for GCC > 2.95 */ +#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))) + #define likely(x) __builtin_expect(!!(x), 1) + #define unlikely(x) __builtin_expect(!!(x), 0) +#else /* !__GNUC__ or GCC < 2.95 */ + #define likely(x) (x) + #define unlikely(x) (x) +#endif /* __GNUC__ */ +static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; } + +static PyObject *__pyx_m = NULL; +static PyObject *__pyx_d; +static PyObject *__pyx_b; +static PyObject *__pyx_cython_runtime; +static PyObject *__pyx_empty_tuple; +static PyObject *__pyx_empty_bytes; +static PyObject *__pyx_empty_unicode; +static int __pyx_lineno; +static int __pyx_clineno = 0; +static const char * __pyx_cfilenm= __FILE__; +static const char *__pyx_filename; + +/* Header.proto */ +#if !defined(CYTHON_CCOMPLEX) + #if defined(__cplusplus) + #define CYTHON_CCOMPLEX 1 + #elif defined(_Complex_I) + #define CYTHON_CCOMPLEX 1 + #else + #define CYTHON_CCOMPLEX 0 + #endif +#endif +#if CYTHON_CCOMPLEX + #ifdef __cplusplus + #include + #else + #include + #endif +#endif +#if CYTHON_CCOMPLEX && !defined(__cplusplus) && defined(__sun__) && defined(__GNUC__) + #undef _Complex_I + #define _Complex_I 1.0fj +#endif + + +static const char *__pyx_f[] = { + "fasttext_inner.pyx", + "__init__.pxd", + "type.pxd", +}; +/* ForceInitThreads.proto */ +#ifndef __PYX_FORCE_INIT_THREADS + #define __PYX_FORCE_INIT_THREADS 0 +#endif + +/* NoFastGil.proto */ +#define __Pyx_PyGILState_Ensure PyGILState_Ensure +#define __Pyx_PyGILState_Release PyGILState_Release +#define __Pyx_FastGIL_Remember() +#define __Pyx_FastGIL_Forget() +#define __Pyx_FastGilFuncInit() + + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":743 + * # in Cython to enable them only on the right systems. + * + * ctypedef npy_int8 int8_t # <<<<<<<<<<<<<< + * ctypedef npy_int16 int16_t + * ctypedef npy_int32 int32_t + */ +typedef npy_int8 __pyx_t_5numpy_int8_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":744 + * + * ctypedef npy_int8 int8_t + * ctypedef npy_int16 int16_t # <<<<<<<<<<<<<< + * ctypedef npy_int32 int32_t + * ctypedef npy_int64 int64_t + */ +typedef npy_int16 __pyx_t_5numpy_int16_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":745 + * ctypedef npy_int8 int8_t + * ctypedef npy_int16 int16_t + * ctypedef npy_int32 int32_t # <<<<<<<<<<<<<< + * ctypedef npy_int64 int64_t + * #ctypedef npy_int96 int96_t + */ +typedef npy_int32 __pyx_t_5numpy_int32_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":746 + * ctypedef npy_int16 int16_t + * ctypedef npy_int32 int32_t + * ctypedef npy_int64 int64_t # <<<<<<<<<<<<<< + * #ctypedef npy_int96 int96_t + * #ctypedef npy_int128 int128_t + */ +typedef npy_int64 __pyx_t_5numpy_int64_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":750 + * #ctypedef npy_int128 int128_t + * + * ctypedef npy_uint8 uint8_t # <<<<<<<<<<<<<< + * ctypedef npy_uint16 uint16_t + * ctypedef npy_uint32 uint32_t + */ +typedef npy_uint8 __pyx_t_5numpy_uint8_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":751 + * + * ctypedef npy_uint8 uint8_t + * ctypedef npy_uint16 uint16_t # <<<<<<<<<<<<<< + * ctypedef npy_uint32 uint32_t + * ctypedef npy_uint64 uint64_t + */ +typedef npy_uint16 __pyx_t_5numpy_uint16_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":752 + * ctypedef npy_uint8 uint8_t + * ctypedef npy_uint16 uint16_t + * ctypedef npy_uint32 uint32_t # <<<<<<<<<<<<<< + * ctypedef npy_uint64 uint64_t + * #ctypedef npy_uint96 uint96_t + */ +typedef npy_uint32 __pyx_t_5numpy_uint32_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":753 + * ctypedef npy_uint16 uint16_t + * ctypedef npy_uint32 uint32_t + * ctypedef npy_uint64 uint64_t # <<<<<<<<<<<<<< + * #ctypedef npy_uint96 uint96_t + * #ctypedef npy_uint128 uint128_t + */ +typedef npy_uint64 __pyx_t_5numpy_uint64_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":757 + * #ctypedef npy_uint128 uint128_t + * + * ctypedef npy_float32 float32_t # <<<<<<<<<<<<<< + * ctypedef npy_float64 float64_t + * #ctypedef npy_float80 float80_t + */ +typedef npy_float32 __pyx_t_5numpy_float32_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":758 + * + * ctypedef npy_float32 float32_t + * ctypedef npy_float64 float64_t # <<<<<<<<<<<<<< + * #ctypedef npy_float80 float80_t + * #ctypedef npy_float128 float128_t + */ +typedef npy_float64 __pyx_t_5numpy_float64_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":767 + * # The int types are mapped a bit surprising -- + * # numpy.int corresponds to 'l' and numpy.long to 'q' + * ctypedef npy_long int_t # <<<<<<<<<<<<<< + * ctypedef npy_longlong long_t + * ctypedef npy_longlong longlong_t + */ +typedef npy_long __pyx_t_5numpy_int_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":768 + * # numpy.int corresponds to 'l' and numpy.long to 'q' + * ctypedef npy_long int_t + * ctypedef npy_longlong long_t # <<<<<<<<<<<<<< + * ctypedef npy_longlong longlong_t + * + */ +typedef npy_longlong __pyx_t_5numpy_long_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":769 + * ctypedef npy_long int_t + * ctypedef npy_longlong long_t + * ctypedef npy_longlong longlong_t # <<<<<<<<<<<<<< + * + * ctypedef npy_ulong uint_t + */ +typedef npy_longlong __pyx_t_5numpy_longlong_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":771 + * ctypedef npy_longlong longlong_t + * + * ctypedef npy_ulong uint_t # <<<<<<<<<<<<<< + * ctypedef npy_ulonglong ulong_t + * ctypedef npy_ulonglong ulonglong_t + */ +typedef npy_ulong __pyx_t_5numpy_uint_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":772 + * + * ctypedef npy_ulong uint_t + * ctypedef npy_ulonglong ulong_t # <<<<<<<<<<<<<< + * ctypedef npy_ulonglong ulonglong_t + * + */ +typedef npy_ulonglong __pyx_t_5numpy_ulong_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":773 + * ctypedef npy_ulong uint_t + * ctypedef npy_ulonglong ulong_t + * ctypedef npy_ulonglong ulonglong_t # <<<<<<<<<<<<<< + * + * ctypedef npy_intp intp_t + */ +typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":775 + * ctypedef npy_ulonglong ulonglong_t + * + * ctypedef npy_intp intp_t # <<<<<<<<<<<<<< + * ctypedef npy_uintp uintp_t + * + */ +typedef npy_intp __pyx_t_5numpy_intp_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":776 + * + * ctypedef npy_intp intp_t + * ctypedef npy_uintp uintp_t # <<<<<<<<<<<<<< + * + * ctypedef npy_double float_t + */ +typedef npy_uintp __pyx_t_5numpy_uintp_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":778 + * ctypedef npy_uintp uintp_t + * + * ctypedef npy_double float_t # <<<<<<<<<<<<<< + * ctypedef npy_double double_t + * ctypedef npy_longdouble longdouble_t + */ +typedef npy_double __pyx_t_5numpy_float_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":779 + * + * ctypedef npy_double float_t + * ctypedef npy_double double_t # <<<<<<<<<<<<<< + * ctypedef npy_longdouble longdouble_t + * + */ +typedef npy_double __pyx_t_5numpy_double_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":780 + * ctypedef npy_double float_t + * ctypedef npy_double double_t + * ctypedef npy_longdouble longdouble_t # <<<<<<<<<<<<<< + * + * ctypedef npy_cfloat cfloat_t + */ +typedef npy_longdouble __pyx_t_5numpy_longdouble_t; + +/* "word2vec_inner.pxd":12 + * + * cimport numpy as np + * ctypedef np.float32_t REAL_t # <<<<<<<<<<<<<< + * + * # BLAS routine signatures + */ +typedef __pyx_t_5numpy_float32_t __pyx_t_6gensim_6models_14word2vec_inner_REAL_t; +/* Declarations.proto */ +#if CYTHON_CCOMPLEX + #ifdef __cplusplus + typedef ::std::complex< float > __pyx_t_float_complex; + #else + typedef float _Complex __pyx_t_float_complex; + #endif +#else + typedef struct { float real, imag; } __pyx_t_float_complex; +#endif +static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float, float); + +/* Declarations.proto */ +#if CYTHON_CCOMPLEX + #ifdef __cplusplus + typedef ::std::complex< double > __pyx_t_double_complex; + #else + typedef double _Complex __pyx_t_double_complex; + #endif +#else + typedef struct { double real, imag; } __pyx_t_double_complex; +#endif +static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double, double); + + +/*--- Type declarations ---*/ + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":782 + * ctypedef npy_longdouble longdouble_t + * + * ctypedef npy_cfloat cfloat_t # <<<<<<<<<<<<<< + * ctypedef npy_cdouble cdouble_t + * ctypedef npy_clongdouble clongdouble_t + */ +typedef npy_cfloat __pyx_t_5numpy_cfloat_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":783 + * + * ctypedef npy_cfloat cfloat_t + * ctypedef npy_cdouble cdouble_t # <<<<<<<<<<<<<< + * ctypedef npy_clongdouble clongdouble_t + * + */ +typedef npy_cdouble __pyx_t_5numpy_cdouble_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":784 + * ctypedef npy_cfloat cfloat_t + * ctypedef npy_cdouble cdouble_t + * ctypedef npy_clongdouble clongdouble_t # <<<<<<<<<<<<<< + * + * ctypedef npy_cdouble complex_t + */ +typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t; + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":786 + * ctypedef npy_clongdouble clongdouble_t + * + * ctypedef npy_cdouble complex_t # <<<<<<<<<<<<<< + * + * cdef inline object PyArray_MultiIterNew1(a): + */ +typedef npy_cdouble __pyx_t_5numpy_complex_t; + +/* "word2vec_inner.pxd":15 + * + * # BLAS routine signatures + * ctypedef void (*scopy_ptr) (const int *N, const float *X, const int *incX, float *Y, const int *incY) nogil # <<<<<<<<<<<<<< + * ctypedef void (*saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil + * ctypedef float (*sdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil + */ +typedef void (*__pyx_t_6gensim_6models_14word2vec_inner_scopy_ptr)(int const *, float const *, int const *, float *, int const *); + +/* "word2vec_inner.pxd":16 + * # BLAS routine signatures + * ctypedef void (*scopy_ptr) (const int *N, const float *X, const int *incX, float *Y, const int *incY) nogil + * ctypedef void (*saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil # <<<<<<<<<<<<<< + * ctypedef float (*sdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil + * ctypedef double (*dsdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil + */ +typedef void (*__pyx_t_6gensim_6models_14word2vec_inner_saxpy_ptr)(int const *, float const *, float const *, int const *, float *, int const *); + +/* "word2vec_inner.pxd":17 + * ctypedef void (*scopy_ptr) (const int *N, const float *X, const int *incX, float *Y, const int *incY) nogil + * ctypedef void (*saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil + * ctypedef float (*sdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil # <<<<<<<<<<<<<< + * ctypedef double (*dsdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil + * ctypedef double (*snrm2_ptr) (const int *N, const float *X, const int *incX) nogil + */ +typedef float (*__pyx_t_6gensim_6models_14word2vec_inner_sdot_ptr)(int const *, float const *, int const *, float const *, int const *); + +/* "word2vec_inner.pxd":18 + * ctypedef void (*saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil + * ctypedef float (*sdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil + * ctypedef double (*dsdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil # <<<<<<<<<<<<<< + * ctypedef double (*snrm2_ptr) (const int *N, const float *X, const int *incX) nogil + * ctypedef void (*sscal_ptr) (const int *N, const float *alpha, const float *X, const int *incX) nogil + */ +typedef double (*__pyx_t_6gensim_6models_14word2vec_inner_dsdot_ptr)(int const *, float const *, int const *, float const *, int const *); + +/* "word2vec_inner.pxd":19 + * ctypedef float (*sdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil + * ctypedef double (*dsdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil + * ctypedef double (*snrm2_ptr) (const int *N, const float *X, const int *incX) nogil # <<<<<<<<<<<<<< + * ctypedef void (*sscal_ptr) (const int *N, const float *alpha, const float *X, const int *incX) nogil + * + */ +typedef double (*__pyx_t_6gensim_6models_14word2vec_inner_snrm2_ptr)(int const *, float const *, int const *); + +/* "word2vec_inner.pxd":20 + * ctypedef double (*dsdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil + * ctypedef double (*snrm2_ptr) (const int *N, const float *X, const int *incX) nogil + * ctypedef void (*sscal_ptr) (const int *N, const float *alpha, const float *X, const int *incX) nogil # <<<<<<<<<<<<<< + * + * cdef scopy_ptr scopy + */ +typedef void (*__pyx_t_6gensim_6models_14word2vec_inner_sscal_ptr)(int const *, float const *, float const *, int const *); + +/* "word2vec_inner.pxd":35 + * + * # function implementations swapped based on BLAS detected in word2vec_inner.pyx init() + * ctypedef REAL_t (*our_dot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil # <<<<<<<<<<<<<< + * ctypedef void (*our_saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil + * + */ +typedef __pyx_t_6gensim_6models_14word2vec_inner_REAL_t (*__pyx_t_6gensim_6models_14word2vec_inner_our_dot_ptr)(int const *, float const *, int const *, float const *, int const *); + +/* "word2vec_inner.pxd":36 + * # function implementations swapped based on BLAS detected in word2vec_inner.pyx init() + * ctypedef REAL_t (*our_dot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil + * ctypedef void (*our_saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil # <<<<<<<<<<<<<< + * + * cdef our_dot_ptr our_dot + */ +typedef void (*__pyx_t_6gensim_6models_14word2vec_inner_our_saxpy_ptr)(int const *, float const *, float const *, int const *, float *, int const *); + +/* --- Runtime support code (head) --- */ +/* Refnanny.proto */ +#ifndef CYTHON_REFNANNY + #define CYTHON_REFNANNY 0 +#endif +#if CYTHON_REFNANNY + typedef struct { + void (*INCREF)(void*, PyObject*, int); + void (*DECREF)(void*, PyObject*, int); + void (*GOTREF)(void*, PyObject*, int); + void (*GIVEREF)(void*, PyObject*, int); + void* (*SetupContext)(const char*, int, const char*); + void (*FinishContext)(void**); + } __Pyx_RefNannyAPIStruct; + static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL; + static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); + #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL; +#ifdef WITH_THREAD + #define __Pyx_RefNannySetupContext(name, acquire_gil)\ + if (acquire_gil) {\ + PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\ + PyGILState_Release(__pyx_gilstate_save);\ + } else {\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\ + } +#else + #define __Pyx_RefNannySetupContext(name, acquire_gil)\ + __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__) +#endif + #define __Pyx_RefNannyFinishContext()\ + __Pyx_RefNanny->FinishContext(&__pyx_refnanny) + #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__) + #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__) + #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__) + #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__) + #define __Pyx_XINCREF(r) do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0) + #define __Pyx_XDECREF(r) do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0) + #define __Pyx_XGOTREF(r) do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0) + #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0) +#else + #define __Pyx_RefNannyDeclarations + #define __Pyx_RefNannySetupContext(name, acquire_gil) + #define __Pyx_RefNannyFinishContext() + #define __Pyx_INCREF(r) Py_INCREF(r) + #define __Pyx_DECREF(r) Py_DECREF(r) + #define __Pyx_GOTREF(r) + #define __Pyx_GIVEREF(r) + #define __Pyx_XINCREF(r) Py_XINCREF(r) + #define __Pyx_XDECREF(r) Py_XDECREF(r) + #define __Pyx_XGOTREF(r) + #define __Pyx_XGIVEREF(r) +#endif +#define __Pyx_XDECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; __Pyx_XDECREF(tmp);\ + } while (0) +#define __Pyx_DECREF_SET(r, v) do {\ + PyObject *tmp = (PyObject *) r;\ + r = v; __Pyx_DECREF(tmp);\ + } while (0) +#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0) +#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0) + +/* PyObjectGetAttrStr.proto */ +#if CYTHON_USE_TYPE_SLOTS +static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) { + PyTypeObject* tp = Py_TYPE(obj); + if (likely(tp->tp_getattro)) + return tp->tp_getattro(obj, attr_name); +#if PY_MAJOR_VERSION < 3 + if (likely(tp->tp_getattr)) + return tp->tp_getattr(obj, PyString_AS_STRING(attr_name)); +#endif + return PyObject_GetAttr(obj, attr_name); +} +#else +#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n) +#endif + +/* GetBuiltinName.proto */ +static PyObject *__Pyx_GetBuiltinName(PyObject *name); + +/* PyThreadStateGet.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate; +#define __Pyx_PyThreadState_assign __pyx_tstate = __Pyx_PyThreadState_Current; +#define __Pyx_PyErr_Occurred() __pyx_tstate->curexc_type +#else +#define __Pyx_PyThreadState_declare +#define __Pyx_PyThreadState_assign +#define __Pyx_PyErr_Occurred() PyErr_Occurred() +#endif + +/* PyErrFetchRestore.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL) +#define __Pyx_ErrRestoreWithState(type, value, tb) __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb) +#define __Pyx_ErrFetchWithState(type, value, tb) __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb) +#define __Pyx_ErrRestore(type, value, tb) __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb) +#define __Pyx_ErrFetch(type, value, tb) __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); +static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL)) +#else +#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) +#endif +#else +#define __Pyx_PyErr_Clear() PyErr_Clear() +#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) +#define __Pyx_ErrRestoreWithState(type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetchWithState(type, value, tb) PyErr_Fetch(type, value, tb) +#define __Pyx_ErrRestoreInState(tstate, type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetchInState(tstate, type, value, tb) PyErr_Fetch(type, value, tb) +#define __Pyx_ErrRestore(type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetch(type, value, tb) PyErr_Fetch(type, value, tb) +#endif + +/* WriteUnraisableException.proto */ +static void __Pyx_WriteUnraisable(const char *name, int clineno, + int lineno, const char *filename, + int full_traceback, int nogil); + +/* RaiseArgTupleInvalid.proto */ +static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, + Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); + +/* RaiseDoubleKeywords.proto */ +static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); + +/* ParseKeywords.proto */ +static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[],\ + PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args,\ + const char* function_name); + +/* ExtTypeTest.proto */ +static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type); + +/* PyObjectCall.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); +#else +#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw) +#endif + +/* PySequenceContains.proto */ +static CYTHON_INLINE int __Pyx_PySequence_ContainsTF(PyObject* item, PyObject* seq, int eq) { + int result = PySequence_Contains(seq, item); + return unlikely(result < 0) ? result : (result == (eq == Py_EQ)); +} + +/* PyFunctionFastCall.proto */ +#if CYTHON_FAST_PYCALL +#define __Pyx_PyFunction_FastCall(func, args, nargs)\ + __Pyx_PyFunction_FastCallDict((func), (args), (nargs), NULL) +#if 1 || PY_VERSION_HEX < 0x030600B1 +static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, int nargs, PyObject *kwargs); +#else +#define __Pyx_PyFunction_FastCallDict(func, args, nargs, kwargs) _PyFunction_FastCallDict(func, args, nargs, kwargs) +#endif +#endif + +/* PyCFunctionFastCall.proto */ +#if CYTHON_FAST_PYCCALL +static CYTHON_INLINE PyObject *__Pyx_PyCFunction_FastCall(PyObject *func, PyObject **args, Py_ssize_t nargs); +#else +#define __Pyx_PyCFunction_FastCall(func, args, nargs) (assert(0), NULL) +#endif + +/* GetItemInt.proto */ +#define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) :\ + (is_list ? (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL) :\ + __Pyx_GetItemInt_Generic(o, to_py_func(i)))) +#define __Pyx_GetItemInt_List(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_GetItemInt_List_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\ + (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL)) +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, + int wraparound, int boundscheck); +#define __Pyx_GetItemInt_Tuple(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ + (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ + __Pyx_GetItemInt_Tuple_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\ + (PyErr_SetString(PyExc_IndexError, "tuple index out of range"), (PyObject*)NULL)) +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, + int wraparound, int boundscheck); +static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j); +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, + int is_list, int wraparound, int boundscheck); + +/* RaiseException.proto */ +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); + +/* DictGetItem.proto */ +#if PY_MAJOR_VERSION >= 3 && !CYTHON_COMPILING_IN_PYPY +static PyObject *__Pyx_PyDict_GetItem(PyObject *d, PyObject* key) { + PyObject *value; + value = PyDict_GetItemWithError(d, key); + if (unlikely(!value)) { + if (!PyErr_Occurred()) { + PyObject* args = PyTuple_Pack(1, key); + if (likely(args)) + PyErr_SetObject(PyExc_KeyError, args); + Py_XDECREF(args); + } + return NULL; + } + Py_INCREF(value); + return value; +} +#else + #define __Pyx_PyDict_GetItem(d, key) PyObject_GetItem(d, key) +#endif + +/* RaiseTooManyValuesToUnpack.proto */ +static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected); + +/* RaiseNeedMoreValuesToUnpack.proto */ +static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index); + +/* RaiseNoneIterError.proto */ +static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void); + +/* SaveResetException.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_ExceptionSave(type, value, tb) __Pyx__ExceptionSave(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#define __Pyx_ExceptionReset(type, value, tb) __Pyx__ExceptionReset(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); +#else +#define __Pyx_ExceptionSave(type, value, tb) PyErr_GetExcInfo(type, value, tb) +#define __Pyx_ExceptionReset(type, value, tb) PyErr_SetExcInfo(type, value, tb) +#endif + +/* PyErrExceptionMatches.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyErr_ExceptionMatches(err) __Pyx_PyErr_ExceptionMatchesInState(__pyx_tstate, err) +static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err); +#else +#define __Pyx_PyErr_ExceptionMatches(err) PyErr_ExceptionMatches(err) +#endif + +/* GetException.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_GetException(type, value, tb) __Pyx__GetException(__pyx_tstate, type, value, tb) +static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#else +static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb); +#endif + +/* Import.proto */ +static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); + +/* ImportFrom.proto */ +static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name); + +/* GetModuleGlobalName.proto */ +static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name); + +/* PyObjectCallMethO.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); +#endif + +/* PyObjectCallOneArg.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); + +/* PyObjectCallNoArg.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func); +#else +#define __Pyx_PyObject_CallNoArg(func) __Pyx_PyObject_Call(func, __pyx_empty_tuple, NULL) +#endif + +/* CLineInTraceback.proto */ +#ifdef CYTHON_CLINE_IN_TRACEBACK +#define __Pyx_CLineForTraceback(tstate, c_line) (((CYTHON_CLINE_IN_TRACEBACK)) ? c_line : 0) +#else +static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line); +#endif + +/* CodeObjectCache.proto */ +typedef struct { + PyCodeObject* code_object; + int code_line; +} __Pyx_CodeObjectCacheEntry; +struct __Pyx_CodeObjectCache { + int count; + int max_count; + __Pyx_CodeObjectCacheEntry* entries; +}; +static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL}; +static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line); +static PyCodeObject *__pyx_find_code_object(int code_line); +static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object); + +/* AddTraceback.proto */ +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename); + +/* CIntToPy.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_npy_uint32(npy_uint32 value); + +/* CIntToPy.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value); + +/* CIntToPy.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value); + +/* CIntToPy.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_PY_LONG_LONG(unsigned PY_LONG_LONG value); + +/* RealImag.proto */ +#if CYTHON_CCOMPLEX + #ifdef __cplusplus + #define __Pyx_CREAL(z) ((z).real()) + #define __Pyx_CIMAG(z) ((z).imag()) + #else + #define __Pyx_CREAL(z) (__real__(z)) + #define __Pyx_CIMAG(z) (__imag__(z)) + #endif +#else + #define __Pyx_CREAL(z) ((z).real) + #define __Pyx_CIMAG(z) ((z).imag) +#endif +#if defined(__cplusplus) && CYTHON_CCOMPLEX\ + && (defined(_WIN32) || defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 5 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4 )) || __cplusplus >= 201103) + #define __Pyx_SET_CREAL(z,x) ((z).real(x)) + #define __Pyx_SET_CIMAG(z,y) ((z).imag(y)) +#else + #define __Pyx_SET_CREAL(z,x) __Pyx_CREAL(z) = (x) + #define __Pyx_SET_CIMAG(z,y) __Pyx_CIMAG(z) = (y) +#endif + +/* Arithmetic.proto */ +#if CYTHON_CCOMPLEX + #define __Pyx_c_eq_float(a, b) ((a)==(b)) + #define __Pyx_c_sum_float(a, b) ((a)+(b)) + #define __Pyx_c_diff_float(a, b) ((a)-(b)) + #define __Pyx_c_prod_float(a, b) ((a)*(b)) + #define __Pyx_c_quot_float(a, b) ((a)/(b)) + #define __Pyx_c_neg_float(a) (-(a)) + #ifdef __cplusplus + #define __Pyx_c_is_zero_float(z) ((z)==(float)0) + #define __Pyx_c_conj_float(z) (::std::conj(z)) + #if 1 + #define __Pyx_c_abs_float(z) (::std::abs(z)) + #define __Pyx_c_pow_float(a, b) (::std::pow(a, b)) + #endif + #else + #define __Pyx_c_is_zero_float(z) ((z)==0) + #define __Pyx_c_conj_float(z) (conjf(z)) + #if 1 + #define __Pyx_c_abs_float(z) (cabsf(z)) + #define __Pyx_c_pow_float(a, b) (cpowf(a, b)) + #endif + #endif +#else + static CYTHON_INLINE int __Pyx_c_eq_float(__pyx_t_float_complex, __pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sum_float(__pyx_t_float_complex, __pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_diff_float(__pyx_t_float_complex, __pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prod_float(__pyx_t_float_complex, __pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex, __pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_neg_float(__pyx_t_float_complex); + static CYTHON_INLINE int __Pyx_c_is_zero_float(__pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conj_float(__pyx_t_float_complex); + #if 1 + static CYTHON_INLINE float __Pyx_c_abs_float(__pyx_t_float_complex); + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_pow_float(__pyx_t_float_complex, __pyx_t_float_complex); + #endif +#endif + +/* Arithmetic.proto */ +#if CYTHON_CCOMPLEX + #define __Pyx_c_eq_double(a, b) ((a)==(b)) + #define __Pyx_c_sum_double(a, b) ((a)+(b)) + #define __Pyx_c_diff_double(a, b) ((a)-(b)) + #define __Pyx_c_prod_double(a, b) ((a)*(b)) + #define __Pyx_c_quot_double(a, b) ((a)/(b)) + #define __Pyx_c_neg_double(a) (-(a)) + #ifdef __cplusplus + #define __Pyx_c_is_zero_double(z) ((z)==(double)0) + #define __Pyx_c_conj_double(z) (::std::conj(z)) + #if 1 + #define __Pyx_c_abs_double(z) (::std::abs(z)) + #define __Pyx_c_pow_double(a, b) (::std::pow(a, b)) + #endif + #else + #define __Pyx_c_is_zero_double(z) ((z)==0) + #define __Pyx_c_conj_double(z) (conj(z)) + #if 1 + #define __Pyx_c_abs_double(z) (cabs(z)) + #define __Pyx_c_pow_double(a, b) (cpow(a, b)) + #endif + #endif +#else + static CYTHON_INLINE int __Pyx_c_eq_double(__pyx_t_double_complex, __pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum_double(__pyx_t_double_complex, __pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff_double(__pyx_t_double_complex, __pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod_double(__pyx_t_double_complex, __pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex, __pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg_double(__pyx_t_double_complex); + static CYTHON_INLINE int __Pyx_c_is_zero_double(__pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj_double(__pyx_t_double_complex); + #if 1 + static CYTHON_INLINE double __Pyx_c_abs_double(__pyx_t_double_complex); + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow_double(__pyx_t_double_complex, __pyx_t_double_complex); + #endif +#endif + +/* CIntToPy.proto */ +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_enum__NPY_TYPES(enum NPY_TYPES value); + +/* CIntFromPy.proto */ +static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); + +/* CIntFromPy.proto */ +static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *); + +/* CIntFromPy.proto */ +static CYTHON_INLINE PY_LONG_LONG __Pyx_PyInt_As_PY_LONG_LONG(PyObject *); + +/* CIntFromPy.proto */ +static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_PyInt_As_unsigned_PY_LONG_LONG(PyObject *); + +/* CIntFromPy.proto */ +static CYTHON_INLINE npy_uint32 __Pyx_PyInt_As_npy_uint32(PyObject *); + +/* FastTypeChecks.proto */ +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_TypeCheck(obj, type) __Pyx_IsSubtype(Py_TYPE(obj), (PyTypeObject *)type) +static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b); +static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject *type); +static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *type1, PyObject *type2); +#else +#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type) +#define __Pyx_PyErr_GivenExceptionMatches(err, type) PyErr_GivenExceptionMatches(err, type) +#define __Pyx_PyErr_GivenExceptionMatches2(err, type1, type2) (PyErr_GivenExceptionMatches(err, type1) || PyErr_GivenExceptionMatches(err, type2)) +#endif + +/* CheckBinaryVersion.proto */ +static int __Pyx_check_binary_version(void); + +/* PyIdentifierFromString.proto */ +#if !defined(__Pyx_PyIdentifier_FromString) +#if PY_MAJOR_VERSION < 3 + #define __Pyx_PyIdentifier_FromString(s) PyString_FromString(s) +#else + #define __Pyx_PyIdentifier_FromString(s) PyUnicode_FromString(s) +#endif +#endif + +/* ModuleImport.proto */ +static PyObject *__Pyx_ImportModule(const char *name); + +/* TypeImport.proto */ +static PyTypeObject *__Pyx_ImportType(const char *module_name, const char *class_name, size_t size, int strict); + +/* VoidPtrImport.proto */ +static int __Pyx_ImportVoidPtr(PyObject *module, const char *name, void **p, const char *sig); + +/* FunctionImport.proto */ +static int __Pyx_ImportFunction(PyObject *module, const char *funcname, void (**f)(void), const char *sig); + +/* InitStrings.proto */ +static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); + + +/* Module declarations from 'cython' */ + +/* Module declarations from 'cpython.buffer' */ + +/* Module declarations from 'libc.string' */ + +/* Module declarations from 'libc.stdio' */ + +/* Module declarations from '__builtin__' */ + +/* Module declarations from 'cpython.type' */ +static PyTypeObject *__pyx_ptype_7cpython_4type_type = 0; + +/* Module declarations from 'cpython' */ + +/* Module declarations from 'cpython.object' */ + +/* Module declarations from 'cpython.ref' */ + +/* Module declarations from 'cpython.mem' */ + +/* Module declarations from 'numpy' */ + +/* Module declarations from 'numpy' */ +static PyTypeObject *__pyx_ptype_5numpy_dtype = 0; +static PyTypeObject *__pyx_ptype_5numpy_flatiter = 0; +static PyTypeObject *__pyx_ptype_5numpy_broadcast = 0; +static PyTypeObject *__pyx_ptype_5numpy_ndarray = 0; +static PyTypeObject *__pyx_ptype_5numpy_ufunc = 0; +static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *, char *, char *, int *); /*proto*/ + +/* Module declarations from 'libc.math' */ + +/* Module declarations from 'libc.stdlib' */ + +/* Module declarations from 'gensim.models.word2vec_inner' */ +static __pyx_t_6gensim_6models_14word2vec_inner_scopy_ptr *__pyx_vp_6gensim_6models_14word2vec_inner_scopy = 0; +#define __pyx_v_6gensim_6models_14word2vec_inner_scopy (*__pyx_vp_6gensim_6models_14word2vec_inner_scopy) +static __pyx_t_6gensim_6models_14word2vec_inner_saxpy_ptr *__pyx_vp_6gensim_6models_14word2vec_inner_saxpy = 0; +#define __pyx_v_6gensim_6models_14word2vec_inner_saxpy (*__pyx_vp_6gensim_6models_14word2vec_inner_saxpy) +static __pyx_t_6gensim_6models_14word2vec_inner_sdot_ptr *__pyx_vp_6gensim_6models_14word2vec_inner_sdot = 0; +#define __pyx_v_6gensim_6models_14word2vec_inner_sdot (*__pyx_vp_6gensim_6models_14word2vec_inner_sdot) +static __pyx_t_6gensim_6models_14word2vec_inner_dsdot_ptr *__pyx_vp_6gensim_6models_14word2vec_inner_dsdot = 0; +#define __pyx_v_6gensim_6models_14word2vec_inner_dsdot (*__pyx_vp_6gensim_6models_14word2vec_inner_dsdot) +static __pyx_t_6gensim_6models_14word2vec_inner_snrm2_ptr *__pyx_vp_6gensim_6models_14word2vec_inner_snrm2 = 0; +#define __pyx_v_6gensim_6models_14word2vec_inner_snrm2 (*__pyx_vp_6gensim_6models_14word2vec_inner_snrm2) +static __pyx_t_6gensim_6models_14word2vec_inner_sscal_ptr *__pyx_vp_6gensim_6models_14word2vec_inner_sscal = 0; +#define __pyx_v_6gensim_6models_14word2vec_inner_sscal (*__pyx_vp_6gensim_6models_14word2vec_inner_sscal) +static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t (*__pyx_vp_6gensim_6models_14word2vec_inner_EXP_TABLE)[0x3E8] = 0; +#define __pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE (*__pyx_vp_6gensim_6models_14word2vec_inner_EXP_TABLE) +static __pyx_t_6gensim_6models_14word2vec_inner_our_dot_ptr *__pyx_vp_6gensim_6models_14word2vec_inner_our_dot = 0; +#define __pyx_v_6gensim_6models_14word2vec_inner_our_dot (*__pyx_vp_6gensim_6models_14word2vec_inner_our_dot) +static __pyx_t_6gensim_6models_14word2vec_inner_our_saxpy_ptr *__pyx_vp_6gensim_6models_14word2vec_inner_our_saxpy = 0; +#define __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy (*__pyx_vp_6gensim_6models_14word2vec_inner_our_saxpy) +static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t (*__pyx_f_6gensim_6models_14word2vec_inner_our_dot_double)(int const *, float const *, int const *, float const *, int const *); /*proto*/ +static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t (*__pyx_f_6gensim_6models_14word2vec_inner_our_dot_float)(int const *, float const *, int const *, float const *, int const *); /*proto*/ +static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t (*__pyx_f_6gensim_6models_14word2vec_inner_our_dot_noblas)(int const *, float const *, int const *, float const *, int const *); /*proto*/ +static void (*__pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas)(int const *, float const *, float const *, int const *, float *, int const *); /*proto*/ +static unsigned PY_LONG_LONG (*__pyx_f_6gensim_6models_14word2vec_inner_bisect_left)(__pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG); /*proto*/ +static unsigned PY_LONG_LONG (*__pyx_f_6gensim_6models_14word2vec_inner_random_int32)(unsigned PY_LONG_LONG *); /*proto*/ + +/* Module declarations from 'gensim.models.fasttext_inner' */ +static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_6gensim_6models_14fasttext_inner_LOG_TABLE[0x3E8]; +static int __pyx_v_6gensim_6models_14fasttext_inner_ONE; +static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_6gensim_6models_14fasttext_inner_ONEF; +static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t *, __pyx_t_5numpy_uint32_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, unsigned PY_LONG_LONG, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ +static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t *, __pyx_t_5numpy_uint32_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ +#define __Pyx_MODULE_NAME "gensim.models.fasttext_inner" +extern int __pyx_module_is_main_gensim__models__fasttext_inner; +int __pyx_module_is_main_gensim__models__fasttext_inner = 0; + +/* Implementation of 'gensim.models.fasttext_inner' */ +static PyObject *__pyx_builtin_ImportError; +static PyObject *__pyx_builtin_range; +static PyObject *__pyx_builtin_enumerate; +static PyObject *__pyx_builtin_ValueError; +static PyObject *__pyx_builtin_RuntimeError; +static const char __pyx_k_i[] = "i"; +static const char __pyx_k_j[] = "j"; +static const char __pyx_k_k[] = "k"; +static const char __pyx_k_x[] = "x"; +static const char __pyx_k_y[] = "y"; +static const char __pyx_k_hs[] = "hs"; +static const char __pyx_k_l1[] = "_l1"; +static const char __pyx_k_np[] = "np"; +static const char __pyx_k_wv[] = "wv"; +static const char __pyx_k__12[] = "*"; +static const char __pyx_k_REAL[] = "REAL"; +static const char __pyx_k_code[] = "code"; +static const char __pyx_k_init[] = "init"; +static const char __pyx_k_item[] = "item"; +static const char __pyx_k_l1_2[] = "l1"; +static const char __pyx_k_main[] = "__main__"; +static const char __pyx_k_sent[] = "sent"; +static const char __pyx_k_size[] = "size"; +static const char __pyx_k_syn1[] = "syn1"; +static const char __pyx_k_test[] = "__test__"; +static const char __pyx_k_word[] = "word"; +static const char __pyx_k_work[] = "_work"; +static const char __pyx_k_alpha[] = "alpha"; +static const char __pyx_k_codes[] = "codes"; +static const char __pyx_k_d_res[] = "d_res"; +static const char __pyx_k_fblas[] = "fblas"; +static const char __pyx_k_index[] = "index"; +static const char __pyx_k_model[] = "model"; +static const char __pyx_k_numpy[] = "numpy"; +static const char __pyx_k_p_res[] = "p_res"; +static const char __pyx_k_point[] = "point"; +static const char __pyx_k_range[] = "range"; +static const char __pyx_k_token[] = "token"; +static const char __pyx_k_vocab[] = "vocab"; +static const char __pyx_k_gensim[] = "gensim"; +static const char __pyx_k_import[] = "__import__"; +static const char __pyx_k_ngrams[] = "ngrams"; +static const char __pyx_k_points[] = "points"; +static const char __pyx_k_random[] = "random"; +static const char __pyx_k_sample[] = "sample"; +static const char __pyx_k_window[] = "window"; +static const char __pyx_k_work_2[] = "work"; +static const char __pyx_k_alpha_2[] = "_alpha"; +static const char __pyx_k_float32[] = "float32"; +static const char __pyx_k_idx_end[] = "idx_end"; +static const char __pyx_k_indexes[] = "indexes"; +static const char __pyx_k_randint[] = "randint"; +static const char __pyx_k_subword[] = "subword"; +static const char __pyx_k_syn1neg[] = "syn1neg"; +static const char __pyx_k_vlookup[] = "vlookup"; +static const char __pyx_k_codelens[] = "codelens"; +static const char __pyx_k_expected[] = "expected"; +static const char __pyx_k_matutils[] = "matutils"; +static const char __pyx_k_negative[] = "negative"; +static const char __pyx_k_sent_idx[] = "sent_idx"; +static const char __pyx_k_word2vec[] = "word2vec"; +static const char __pyx_k_word_idx[] = "word_idx"; +static const char __pyx_k_cum_table[] = "cum_table"; +static const char __pyx_k_enumerate[] = "enumerate"; +static const char __pyx_k_idx_start[] = "idx_start"; +static const char __pyx_k_sentences[] = "sentences"; +static const char __pyx_k_ValueError[] = "ValueError"; +static const char __pyx_k_index2word[] = "index2word"; +static const char __pyx_k_sample_int[] = "sample_int"; +static const char __pyx_k_syn0_vocab[] = "syn0_vocab"; +static const char __pyx_k_ImportError[] = "ImportError"; +static const char __pyx_k_layer1_size[] = "layer1_size"; +static const char __pyx_k_next_random[] = "next_random"; +static const char __pyx_k_ngrams_word[] = "ngrams_word"; +static const char __pyx_k_syn0_ngrams[] = "syn0_ngrams"; +static const char __pyx_k_FAST_VERSION[] = "FAST_VERSION"; +static const char __pyx_k_RuntimeError[] = "RuntimeError"; +static const char __pyx_k_sentence_idx[] = "sentence_idx"; +static const char __pyx_k_subwords_idx[] = "subwords_idx"; +static const char __pyx_k_cum_table_len[] = "cum_table_len"; +static const char __pyx_k_word_subwords[] = "word_subwords"; +static const char __pyx_k_train_batch_sg[] = "train_batch_sg"; +static const char __pyx_k_effective_words[] = "effective_words"; +static const char __pyx_k_reduced_windows[] = "reduced_windows"; +static const char __pyx_k_subwords_idx_len[] = "subwords_idx_len"; +static const char __pyx_k_syn0_vocab_lockf[] = "syn0_vocab_lockf"; +static const char __pyx_k_word_locks_vocab[] = "word_locks_vocab"; +static const char __pyx_k_scipy_linalg_blas[] = "scipy.linalg.blas"; +static const char __pyx_k_syn0_ngrams_lockf[] = "syn0_ngrams_lockf"; +static const char __pyx_k_word_locks_ngrams[] = "word_locks_ngrams"; +static const char __pyx_k_MAX_WORDS_IN_BATCH[] = "MAX_WORDS_IN_BATCH"; +static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback"; +static const char __pyx_k_fasttext_inner_pyx[] = "fasttext_inner.pyx"; +static const char __pyx_k_effective_sentences[] = "effective_sentences"; +static const char __pyx_k_ndarray_is_not_C_contiguous[] = "ndarray is not C contiguous"; +static const char __pyx_k_gensim_models_fasttext_inner[] = "gensim.models.fasttext_inner"; +static const char __pyx_k_numpy_core_multiarray_failed_to[] = "numpy.core.multiarray failed to import"; +static const char __pyx_k_unknown_dtype_code_in_numpy_pxd[] = "unknown dtype code in numpy.pxd (%d)"; +static const char __pyx_k_Format_string_allocated_too_shor[] = "Format string allocated too short, see comment in numpy.pxd"; +static const char __pyx_k_Non_native_byte_order_not_suppor[] = "Non-native byte order not supported"; +static const char __pyx_k_ndarray_is_not_Fortran_contiguou[] = "ndarray is not Fortran contiguous"; +static const char __pyx_k_numpy_core_umath_failed_to_impor[] = "numpy.core.umath failed to import"; +static const char __pyx_k_Format_string_allocated_too_shor_2[] = "Format string allocated too short."; +static PyObject *__pyx_n_s_FAST_VERSION; +static PyObject *__pyx_kp_u_Format_string_allocated_too_shor; +static PyObject *__pyx_kp_u_Format_string_allocated_too_shor_2; +static PyObject *__pyx_n_s_ImportError; +static PyObject *__pyx_n_s_MAX_WORDS_IN_BATCH; +static PyObject *__pyx_kp_u_Non_native_byte_order_not_suppor; +static PyObject *__pyx_n_s_REAL; +static PyObject *__pyx_n_s_RuntimeError; +static PyObject *__pyx_n_s_ValueError; +static PyObject *__pyx_n_s__12; +static PyObject *__pyx_n_s_alpha; +static PyObject *__pyx_n_s_alpha_2; +static PyObject *__pyx_n_s_cline_in_traceback; +static PyObject *__pyx_n_s_code; +static PyObject *__pyx_n_s_codelens; +static PyObject *__pyx_n_s_codes; +static PyObject *__pyx_n_s_cum_table; +static PyObject *__pyx_n_s_cum_table_len; +static PyObject *__pyx_n_s_d_res; +static PyObject *__pyx_n_s_effective_sentences; +static PyObject *__pyx_n_s_effective_words; +static PyObject *__pyx_n_s_enumerate; +static PyObject *__pyx_n_s_expected; +static PyObject *__pyx_kp_s_fasttext_inner_pyx; +static PyObject *__pyx_n_s_fblas; +static PyObject *__pyx_n_s_float32; +static PyObject *__pyx_n_s_gensim; +static PyObject *__pyx_n_s_gensim_models_fasttext_inner; +static PyObject *__pyx_n_s_hs; +static PyObject *__pyx_n_s_i; +static PyObject *__pyx_n_s_idx_end; +static PyObject *__pyx_n_s_idx_start; +static PyObject *__pyx_n_s_import; +static PyObject *__pyx_n_s_index; +static PyObject *__pyx_n_s_index2word; +static PyObject *__pyx_n_s_indexes; +static PyObject *__pyx_n_s_init; +static PyObject *__pyx_n_s_item; +static PyObject *__pyx_n_s_j; +static PyObject *__pyx_n_s_k; +static PyObject *__pyx_n_s_l1; +static PyObject *__pyx_n_s_l1_2; +static PyObject *__pyx_n_s_layer1_size; +static PyObject *__pyx_n_s_main; +static PyObject *__pyx_n_s_matutils; +static PyObject *__pyx_n_s_model; +static PyObject *__pyx_kp_u_ndarray_is_not_C_contiguous; +static PyObject *__pyx_kp_u_ndarray_is_not_Fortran_contiguou; +static PyObject *__pyx_n_s_negative; +static PyObject *__pyx_n_s_next_random; +static PyObject *__pyx_n_s_ngrams; +static PyObject *__pyx_n_s_ngrams_word; +static PyObject *__pyx_n_s_np; +static PyObject *__pyx_n_s_numpy; +static PyObject *__pyx_kp_s_numpy_core_multiarray_failed_to; +static PyObject *__pyx_kp_s_numpy_core_umath_failed_to_impor; +static PyObject *__pyx_n_s_p_res; +static PyObject *__pyx_n_s_point; +static PyObject *__pyx_n_s_points; +static PyObject *__pyx_n_s_randint; +static PyObject *__pyx_n_s_random; +static PyObject *__pyx_n_s_range; +static PyObject *__pyx_n_s_reduced_windows; +static PyObject *__pyx_n_s_sample; +static PyObject *__pyx_n_s_sample_int; +static PyObject *__pyx_n_s_scipy_linalg_blas; +static PyObject *__pyx_n_s_sent; +static PyObject *__pyx_n_s_sent_idx; +static PyObject *__pyx_n_s_sentence_idx; +static PyObject *__pyx_n_s_sentences; +static PyObject *__pyx_n_s_size; +static PyObject *__pyx_n_s_subword; +static PyObject *__pyx_n_s_subwords_idx; +static PyObject *__pyx_n_s_subwords_idx_len; +static PyObject *__pyx_n_s_syn0_ngrams; +static PyObject *__pyx_n_s_syn0_ngrams_lockf; +static PyObject *__pyx_n_s_syn0_vocab; +static PyObject *__pyx_n_s_syn0_vocab_lockf; +static PyObject *__pyx_n_s_syn1; +static PyObject *__pyx_n_s_syn1neg; +static PyObject *__pyx_n_s_test; +static PyObject *__pyx_n_s_token; +static PyObject *__pyx_n_s_train_batch_sg; +static PyObject *__pyx_kp_u_unknown_dtype_code_in_numpy_pxd; +static PyObject *__pyx_n_s_vlookup; +static PyObject *__pyx_n_s_vocab; +static PyObject *__pyx_n_s_window; +static PyObject *__pyx_n_s_word; +static PyObject *__pyx_n_s_word2vec; +static PyObject *__pyx_n_s_word_idx; +static PyObject *__pyx_n_s_word_locks_ngrams; +static PyObject *__pyx_n_s_word_locks_vocab; +static PyObject *__pyx_n_s_word_subwords; +static PyObject *__pyx_n_s_work; +static PyObject *__pyx_n_s_work_2; +static PyObject *__pyx_n_s_wv; +static PyObject *__pyx_n_s_x; +static PyObject *__pyx_n_s_y; +static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentences, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v__l1); /* proto */ +static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED PyObject *__pyx_self); /* proto */ +static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */ +static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info); /* proto */ +static PyObject *__pyx_int_0; +static PyObject *__pyx_int_1; +static PyObject *__pyx_int_2; +static PyObject *__pyx_int_10000; +static PyObject *__pyx_int_16777216; +static PyObject *__pyx_tuple_; +static PyObject *__pyx_tuple__2; +static PyObject *__pyx_tuple__3; +static PyObject *__pyx_tuple__4; +static PyObject *__pyx_tuple__5; +static PyObject *__pyx_tuple__6; +static PyObject *__pyx_tuple__7; +static PyObject *__pyx_tuple__8; +static PyObject *__pyx_tuple__9; +static PyObject *__pyx_tuple__10; +static PyObject *__pyx_tuple__11; +static PyObject *__pyx_tuple__13; +static PyObject *__pyx_tuple__15; +static PyObject *__pyx_codeobj__14; +static PyObject *__pyx_codeobj__16; + +/* "gensim/models/fasttext_inner.pyx":44 + * cdef REAL_t ONEF = 1.0 + * + * cdef unsigned long long fast_sentence_sg_neg( # <<<<<<<<<<<<<< + * const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, + * REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1neg, const int size, + */ + +static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_cum_table, unsigned PY_LONG_LONG __pyx_v_cum_table_len, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_vocab, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_ngrams, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word_index, __pyx_t_5numpy_uint32_t *__pyx_v_subwords_index, __pyx_t_5numpy_uint32_t const __pyx_v_subwords_len, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_l1, unsigned PY_LONG_LONG __pyx_v_next_random, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_vocab, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_ngrams) { + __pyx_t_5numpy_uint32_t __pyx_v_word2_index; + PY_LONG_LONG __pyx_v_row1; + PY_LONG_LONG __pyx_v_row2; + unsigned PY_LONG_LONG __pyx_v_modulo; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_g; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_label; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f_dot; + __pyx_t_5numpy_uint32_t __pyx_v_target_index; + int __pyx_v_d; + __pyx_t_5numpy_uint32_t __pyx_v_subword_row; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_norm_factor; + unsigned PY_LONG_LONG __pyx_r; + __pyx_t_5numpy_uint32_t __pyx_t_1; + int __pyx_t_2; + long __pyx_t_3; + int __pyx_t_4; + unsigned PY_LONG_LONG __pyx_t_5; + int __pyx_t_6; + + /* "gensim/models/fasttext_inner.pyx":52 + * + * cdef long long a + * cdef np.uint32_t word2_index = subwords_index[0] # <<<<<<<<<<<<<< + * cdef long long row1 = word2_index * size, row2 + * cdef unsigned long long modulo = 281474976710655ULL + */ + __pyx_v_word2_index = (__pyx_v_subwords_index[0]); + + /* "gensim/models/fasttext_inner.pyx":53 + * cdef long long a + * cdef np.uint32_t word2_index = subwords_index[0] + * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< + * cdef unsigned long long modulo = 281474976710655ULL + * cdef REAL_t f, g, label, f_dot, log_e_f_dot + */ + __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); + + /* "gensim/models/fasttext_inner.pyx":54 + * cdef np.uint32_t word2_index = subwords_index[0] + * cdef long long row1 = word2_index * size, row2 + * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< + * cdef REAL_t f, g, label, f_dot, log_e_f_dot + * cdef np.uint32_t target_index + */ + __pyx_v_modulo = 281474976710655ULL; + + /* "gensim/models/fasttext_inner.pyx":59 + * cdef int d + * + * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< + * memset(l1, 0, size * cython.sizeof(REAL_t)) + * + */ + memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); + + /* "gensim/models/fasttext_inner.pyx":60 + * + * memset(work, 0, size * cython.sizeof(REAL_t)) + * memset(l1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< + * + * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) + */ + memset(__pyx_v_l1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); + + /* "gensim/models/fasttext_inner.pyx":62 + * memset(l1, 0, size * cython.sizeof(REAL_t)) + * + * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) # <<<<<<<<<<<<<< + * for d in range(1, subwords_len): + * subword_row = subwords_index[d] * size + */ + __pyx_v_6gensim_6models_14word2vec_inner_scopy((&__pyx_v_size), (&(__pyx_v_syn0_vocab[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":63 + * + * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) + * for d in range(1, subwords_len): # <<<<<<<<<<<<<< + * subword_row = subwords_index[d] * size + * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) + */ + __pyx_t_1 = __pyx_v_subwords_len; + for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { + __pyx_v_d = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":64 + * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) + * for d in range(1, subwords_len): + * subword_row = subwords_index[d] * size # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) + * cdef REAL_t norm_factor = ONEF / subwords_len + */ + __pyx_v_subword_row = ((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size); + + /* "gensim/models/fasttext_inner.pyx":65 + * for d in range(1, subwords_len): + * subword_row = subwords_index[d] * size + * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) # <<<<<<<<<<<<<< + * cdef REAL_t norm_factor = ONEF / subwords_len + * sscal(&size, &norm_factor, l1 , &ONE) + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_ngrams[__pyx_v_subword_row])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + } + + /* "gensim/models/fasttext_inner.pyx":66 + * subword_row = subwords_index[d] * size + * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) + * cdef REAL_t norm_factor = ONEF / subwords_len # <<<<<<<<<<<<<< + * sscal(&size, &norm_factor, l1 , &ONE) + * + */ + if (unlikely(__pyx_v_subwords_len == 0)) { + #ifdef WITH_THREAD + PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure(); + #endif + PyErr_SetString(PyExc_ZeroDivisionError, "float division"); + #ifdef WITH_THREAD + __Pyx_PyGILState_Release(__pyx_gilstate_save); + #endif + __PYX_ERR(0, 66, __pyx_L1_error) + } + __pyx_v_norm_factor = (__pyx_v_6gensim_6models_14fasttext_inner_ONEF / __pyx_v_subwords_len); + + /* "gensim/models/fasttext_inner.pyx":67 + * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) + * cdef REAL_t norm_factor = ONEF / subwords_len + * sscal(&size, &norm_factor, l1 , &ONE) # <<<<<<<<<<<<<< + * + * for d in range(negative+1): + */ + __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_norm_factor), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":69 + * sscal(&size, &norm_factor, l1 , &ONE) + * + * for d in range(negative+1): # <<<<<<<<<<<<<< + * if d == 0: + * target_index = word_index + */ + __pyx_t_3 = (__pyx_v_negative + 1); + for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_3; __pyx_t_2+=1) { + __pyx_v_d = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":70 + * + * for d in range(negative+1): + * if d == 0: # <<<<<<<<<<<<<< + * target_index = word_index + * label = ONEF + */ + __pyx_t_4 = ((__pyx_v_d == 0) != 0); + if (__pyx_t_4) { + + /* "gensim/models/fasttext_inner.pyx":71 + * for d in range(negative+1): + * if d == 0: + * target_index = word_index # <<<<<<<<<<<<<< + * label = ONEF + * else: + */ + __pyx_v_target_index = __pyx_v_word_index; + + /* "gensim/models/fasttext_inner.pyx":72 + * if d == 0: + * target_index = word_index + * label = ONEF # <<<<<<<<<<<<<< + * else: + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + */ + __pyx_v_label = __pyx_v_6gensim_6models_14fasttext_inner_ONEF; + + /* "gensim/models/fasttext_inner.pyx":70 + * + * for d in range(negative+1): + * if d == 0: # <<<<<<<<<<<<<< + * target_index = word_index + * label = ONEF + */ + goto __pyx_L7; + } + + /* "gensim/models/fasttext_inner.pyx":74 + * label = ONEF + * else: + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) # <<<<<<<<<<<<<< + * next_random = (next_random * 25214903917ULL + 11) & modulo + * if target_index == word_index: + */ + /*else*/ { + __pyx_t_5 = (__pyx_v_next_random >> 16); + __pyx_t_1 = (__pyx_v_cum_table[(__pyx_v_cum_table_len - 1)]); + if (unlikely(__pyx_t_1 == 0)) { + #ifdef WITH_THREAD + PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure(); + #endif + PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero"); + #ifdef WITH_THREAD + __Pyx_PyGILState_Release(__pyx_gilstate_save); + #endif + __PYX_ERR(0, 74, __pyx_L1_error) + } + __pyx_v_target_index = __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_v_cum_table, (__pyx_t_5 % __pyx_t_1), 0, __pyx_v_cum_table_len); + + /* "gensim/models/fasttext_inner.pyx":75 + * else: + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< + * if target_index == word_index: + * continue + */ + __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); + + /* "gensim/models/fasttext_inner.pyx":76 + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + * next_random = (next_random * 25214903917ULL + 11) & modulo + * if target_index == word_index: # <<<<<<<<<<<<<< + * continue + * label = 0.0 + */ + __pyx_t_4 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); + if (__pyx_t_4) { + + /* "gensim/models/fasttext_inner.pyx":77 + * next_random = (next_random * 25214903917ULL + 11) & modulo + * if target_index == word_index: + * continue # <<<<<<<<<<<<<< + * label = 0.0 + * + */ + goto __pyx_L5_continue; + + /* "gensim/models/fasttext_inner.pyx":76 + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + * next_random = (next_random * 25214903917ULL + 11) & modulo + * if target_index == word_index: # <<<<<<<<<<<<<< + * continue + * label = 0.0 + */ + } + + /* "gensim/models/fasttext_inner.pyx":78 + * if target_index == word_index: + * continue + * label = 0.0 # <<<<<<<<<<<<<< + * + * row2 = target_index * size + */ + __pyx_v_label = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); + } + __pyx_L7:; + + /* "gensim/models/fasttext_inner.pyx":80 + * label = 0.0 + * + * row2 = target_index * size # <<<<<<<<<<<<<< + * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: + */ + __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); + + /* "gensim/models/fasttext_inner.pyx":81 + * + * row2 = target_index * size + * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: + * continue + */ + __pyx_v_f_dot = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":82 + * row2 = target_index * size + * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< + * continue + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + */ + __pyx_t_6 = ((__pyx_v_f_dot <= -6.0) != 0); + if (!__pyx_t_6) { + } else { + __pyx_t_4 = __pyx_t_6; + goto __pyx_L10_bool_binop_done; + } + __pyx_t_6 = ((__pyx_v_f_dot >= 6.0) != 0); + __pyx_t_4 = __pyx_t_6; + __pyx_L10_bool_binop_done:; + if (__pyx_t_4) { + + /* "gensim/models/fasttext_inner.pyx":83 + * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: + * continue # <<<<<<<<<<<<<< + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + * g = (label - f) * alpha + */ + goto __pyx_L5_continue; + + /* "gensim/models/fasttext_inner.pyx":82 + * row2 = target_index * size + * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< + * continue + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + */ + } + + /* "gensim/models/fasttext_inner.pyx":84 + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: + * continue + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< + * g = (label - f) * alpha + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + */ + __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f_dot + 6.0) * 83.0))]); + + /* "gensim/models/fasttext_inner.pyx":85 + * continue + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + * g = (label - f) * alpha # <<<<<<<<<<<<<< + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, l1, &ONE, &syn1neg[row2], &ONE) + */ + __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); + + /* "gensim/models/fasttext_inner.pyx":86 + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + * g = (label - f) * alpha + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &g, l1, &ONE, &syn1neg[row2], &ONE) + * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":87 + * g = (label - f) * alpha + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, l1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) + * for d in range(1, subwords_len): + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + __pyx_L5_continue:; + } + + /* "gensim/models/fasttext_inner.pyx":88 + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, l1, &ONE, &syn1neg[row2], &ONE) + * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) # <<<<<<<<<<<<<< + * for d in range(1, subwords_len): + * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[__pyx_v_word2_index])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":89 + * our_saxpy(&size, &g, l1, &ONE, &syn1neg[row2], &ONE) + * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) + * for d in range(1, subwords_len): # <<<<<<<<<<<<<< + * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) + * return next_random + */ + __pyx_t_1 = __pyx_v_subwords_len; + for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { + __pyx_v_d = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":90 + * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) + * for d in range(1, subwords_len): + * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) # <<<<<<<<<<<<<< + * return next_random + * + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_ngrams[(__pyx_v_subwords_index[__pyx_v_d])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_ngrams[((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + } + + /* "gensim/models/fasttext_inner.pyx":91 + * for d in range(1, subwords_len): + * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) + * return next_random # <<<<<<<<<<<<<< + * + * cdef void fast_sentence_sg_hs( + */ + __pyx_r = __pyx_v_next_random; + goto __pyx_L0; + + /* "gensim/models/fasttext_inner.pyx":44 + * cdef REAL_t ONEF = 1.0 + * + * cdef unsigned long long fast_sentence_sg_neg( # <<<<<<<<<<<<<< + * const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, + * REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1neg, const int size, + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_WriteUnraisable("gensim.models.fasttext_inner.fast_sentence_sg_neg", __pyx_clineno, __pyx_lineno, __pyx_filename, 1, 1); + __pyx_r = 0; + __pyx_L0:; + return __pyx_r; +} + +/* "gensim/models/fasttext_inner.pyx":93 + * return next_random + * + * cdef void fast_sentence_sg_hs( # <<<<<<<<<<<<<< + * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, + * REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1, const int size, + */ + +static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int const __pyx_v_codelen, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_vocab, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_ngrams, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t *__pyx_v_subwords_index, __pyx_t_5numpy_uint32_t const __pyx_v_subwords_len, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_l1, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_vocab, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_ngrams) { + PY_LONG_LONG __pyx_v_b; + __pyx_t_5numpy_uint32_t __pyx_v_word2_index; + PY_LONG_LONG __pyx_v_row1; + PY_LONG_LONG __pyx_v_row2; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_g; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f_dot; + long __pyx_v_d; + __pyx_t_5numpy_uint32_t __pyx_v_subword_row; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_norm_factor; + __pyx_t_5numpy_uint32_t __pyx_t_1; + long __pyx_t_2; + int __pyx_t_3; + PY_LONG_LONG __pyx_t_4; + int __pyx_t_5; + int __pyx_t_6; + + /* "gensim/models/fasttext_inner.pyx":101 + * + * cdef long long a, b + * cdef np.uint32_t word2_index = subwords_index[0] # <<<<<<<<<<<<<< + * cdef long long row1 = word2_index * size, row2, sgn + * cdef REAL_t f, g, f_dot, lprob + */ + __pyx_v_word2_index = (__pyx_v_subwords_index[0]); + + /* "gensim/models/fasttext_inner.pyx":102 + * cdef long long a, b + * cdef np.uint32_t word2_index = subwords_index[0] + * cdef long long row1 = word2_index * size, row2, sgn # <<<<<<<<<<<<<< + * cdef REAL_t f, g, f_dot, lprob + * + */ + __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); + + /* "gensim/models/fasttext_inner.pyx":105 + * cdef REAL_t f, g, f_dot, lprob + * + * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< + * memset(l1, 0, size * cython.sizeof(REAL_t)) + * + */ + memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); + + /* "gensim/models/fasttext_inner.pyx":106 + * + * memset(work, 0, size * cython.sizeof(REAL_t)) + * memset(l1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< + * + * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) + */ + memset(__pyx_v_l1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); + + /* "gensim/models/fasttext_inner.pyx":108 + * memset(l1, 0, size * cython.sizeof(REAL_t)) + * + * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) # <<<<<<<<<<<<<< + * for d in range(1, subwords_len): + * subword_row = subwords_index[d] * size + */ + __pyx_v_6gensim_6models_14word2vec_inner_scopy((&__pyx_v_size), (&(__pyx_v_syn0_vocab[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":109 + * + * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) + * for d in range(1, subwords_len): # <<<<<<<<<<<<<< + * subword_row = subwords_index[d] * size + * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) + */ + __pyx_t_1 = __pyx_v_subwords_len; + for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { + __pyx_v_d = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":110 + * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) + * for d in range(1, subwords_len): + * subword_row = subwords_index[d] * size # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) + * cdef REAL_t norm_factor = ONEF / subwords_len + */ + __pyx_v_subword_row = ((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size); + + /* "gensim/models/fasttext_inner.pyx":111 + * for d in range(1, subwords_len): + * subword_row = subwords_index[d] * size + * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) # <<<<<<<<<<<<<< + * cdef REAL_t norm_factor = ONEF / subwords_len + * sscal(&size, &norm_factor, l1 , &ONE) + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_ngrams[__pyx_v_subword_row])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + } + + /* "gensim/models/fasttext_inner.pyx":112 + * subword_row = subwords_index[d] * size + * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) + * cdef REAL_t norm_factor = ONEF / subwords_len # <<<<<<<<<<<<<< + * sscal(&size, &norm_factor, l1 , &ONE) + * + */ + if (unlikely(__pyx_v_subwords_len == 0)) { + #ifdef WITH_THREAD + PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure(); + #endif + PyErr_SetString(PyExc_ZeroDivisionError, "float division"); + #ifdef WITH_THREAD + __Pyx_PyGILState_Release(__pyx_gilstate_save); + #endif + __PYX_ERR(0, 112, __pyx_L1_error) + } + __pyx_v_norm_factor = (__pyx_v_6gensim_6models_14fasttext_inner_ONEF / __pyx_v_subwords_len); + + /* "gensim/models/fasttext_inner.pyx":113 + * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) + * cdef REAL_t norm_factor = ONEF / subwords_len + * sscal(&size, &norm_factor, l1 , &ONE) # <<<<<<<<<<<<<< + * + * for b in range(codelen): + */ + __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_norm_factor), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":115 + * sscal(&size, &norm_factor, l1 , &ONE) + * + * for b in range(codelen): # <<<<<<<<<<<<<< + * row2 = word_point[b] * size + * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) + */ + __pyx_t_3 = __pyx_v_codelen; + for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { + __pyx_v_b = __pyx_t_4; + + /* "gensim/models/fasttext_inner.pyx":116 + * + * for b in range(codelen): + * row2 = word_point[b] * size # <<<<<<<<<<<<<< + * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: + */ + __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); + + /* "gensim/models/fasttext_inner.pyx":117 + * for b in range(codelen): + * row2 = word_point[b] * size + * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: + * continue + */ + __pyx_v_f_dot = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":118 + * row2 = word_point[b] * size + * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< + * continue + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + */ + __pyx_t_6 = ((__pyx_v_f_dot <= -6.0) != 0); + if (!__pyx_t_6) { + } else { + __pyx_t_5 = __pyx_t_6; + goto __pyx_L8_bool_binop_done; + } + __pyx_t_6 = ((__pyx_v_f_dot >= 6.0) != 0); + __pyx_t_5 = __pyx_t_6; + __pyx_L8_bool_binop_done:; + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":119 + * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: + * continue # <<<<<<<<<<<<<< + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + * g = (1 - word_code[b] - f) * alpha + */ + goto __pyx_L5_continue; + + /* "gensim/models/fasttext_inner.pyx":118 + * row2 = word_point[b] * size + * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< + * continue + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + */ + } + + /* "gensim/models/fasttext_inner.pyx":120 + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: + * continue + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< + * g = (1 - word_code[b] - f) * alpha + * + */ + __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f_dot + 6.0) * 83.0))]); + + /* "gensim/models/fasttext_inner.pyx":121 + * continue + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< + * + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + */ + __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); + + /* "gensim/models/fasttext_inner.pyx":123 + * g = (1 - word_code[b] - f) * alpha + * + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &g, l1, &ONE, &syn1[row2], &ONE) + * + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":124 + * + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, l1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< + * + * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + __pyx_L5_continue:; + } + + /* "gensim/models/fasttext_inner.pyx":126 + * our_saxpy(&size, &g, l1, &ONE, &syn1[row2], &ONE) + * + * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) # <<<<<<<<<<<<<< + * for d in range(1, subwords_len): + * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[__pyx_v_word2_index])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":127 + * + * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) + * for d in range(1, subwords_len): # <<<<<<<<<<<<<< + * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) + * + */ + __pyx_t_1 = __pyx_v_subwords_len; + for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { + __pyx_v_d = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":128 + * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) + * for d in range(1, subwords_len): + * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) # <<<<<<<<<<<<<< + * + * def train_batch_sg(model, sentences, alpha, _work, _l1): + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_ngrams[(__pyx_v_subwords_index[__pyx_v_d])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_ngrams[((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + } + + /* "gensim/models/fasttext_inner.pyx":93 + * return next_random + * + * cdef void fast_sentence_sg_hs( # <<<<<<<<<<<<<< + * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, + * REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1, const int size, + */ + + /* function exit code */ + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_WriteUnraisable("gensim.models.fasttext_inner.fast_sentence_sg_hs", __pyx_clineno, __pyx_lineno, __pyx_filename, 1, 1); + __pyx_L0:; +} + +/* "gensim/models/fasttext_inner.pyx":130 + * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) + * + * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< + * cdef int hs = model.hs + * cdef int negative = model.negative + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_1train_batch_sg(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyMethodDef __pyx_mdef_6gensim_6models_14fasttext_inner_1train_batch_sg = {"train_batch_sg", (PyCFunction)__pyx_pw_6gensim_6models_14fasttext_inner_1train_batch_sg, METH_VARARGS|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_1train_batch_sg(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + PyObject *__pyx_v_model = 0; + PyObject *__pyx_v_sentences = 0; + PyObject *__pyx_v_alpha = 0; + PyObject *__pyx_v__work = 0; + PyObject *__pyx_v__l1 = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("train_batch_sg (wrapper)", 0); + { + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_sentences,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_l1,0}; + PyObject* values[5] = {0,0,0,0,0}; + if (unlikely(__pyx_kwds)) { + Py_ssize_t kw_args; + const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); + switch (pos_args) { + case 5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4); + CYTHON_FALLTHROUGH; + case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); + CYTHON_FALLTHROUGH; + case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + CYTHON_FALLTHROUGH; + case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + CYTHON_FALLTHROUGH; + case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = PyDict_Size(__pyx_kwds); + switch (pos_args) { + case 0: + if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_model)) != 0)) kw_args--; + else goto __pyx_L5_argtuple_error; + CYTHON_FALLTHROUGH; + case 1: + if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentences)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 1); __PYX_ERR(0, 130, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 2: + if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 2); __PYX_ERR(0, 130, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 3: + if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 3); __PYX_ERR(0, 130, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 4: + if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_l1)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 4); __PYX_ERR(0, 130, __pyx_L3_error) + } + } + if (unlikely(kw_args > 0)) { + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_batch_sg") < 0)) __PYX_ERR(0, 130, __pyx_L3_error) + } + } else if (PyTuple_GET_SIZE(__pyx_args) != 5) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + values[3] = PyTuple_GET_ITEM(__pyx_args, 3); + values[4] = PyTuple_GET_ITEM(__pyx_args, 4); + } + __pyx_v_model = values[0]; + __pyx_v_sentences = values[1]; + __pyx_v_alpha = values[2]; + __pyx_v__work = values[3]; + __pyx_v__l1 = values[4]; + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 130, __pyx_L3_error) + __pyx_L3_error:; + __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_sg", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(__pyx_self, __pyx_v_model, __pyx_v_sentences, __pyx_v_alpha, __pyx_v__work, __pyx_v__l1); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentences, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v__l1) { + int __pyx_v_hs; + int __pyx_v_negative; + int __pyx_v_sample; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_vocab; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_vocab; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_ngrams; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_ngrams; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_l1; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v__alpha; + int __pyx_v_size; + int __pyx_v_codelens[0x2710]; + __pyx_t_5numpy_uint32_t __pyx_v_indexes[0x2710]; + __pyx_t_5numpy_uint32_t __pyx_v_reduced_windows[0x2710]; + int __pyx_v_sentence_idx[(0x2710 + 1)]; + int __pyx_v_window; + int __pyx_v_i; + int __pyx_v_j; + int __pyx_v_k; + int __pyx_v_effective_words; + int __pyx_v_effective_sentences; + int __pyx_v_sent_idx; + int __pyx_v_idx_start; + int __pyx_v_idx_end; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1; + __pyx_t_5numpy_uint32_t *__pyx_v_points[0x2710]; + __pyx_t_5numpy_uint8_t *__pyx_v_codes[0x2710]; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg; + __pyx_t_5numpy_uint32_t *__pyx_v_cum_table; + unsigned PY_LONG_LONG __pyx_v_cum_table_len; + unsigned PY_LONG_LONG __pyx_v_next_random; + __pyx_t_5numpy_uint32_t __pyx_v_subwords_idx_len[0x2710]; + __pyx_t_5numpy_uint32_t __pyx_v_subwords_idx[0x2710][0x3E8]; + PyObject *__pyx_v_vlookup = NULL; + PyObject *__pyx_v_sent = NULL; + PyObject *__pyx_v_token = NULL; + PyObject *__pyx_v_word = NULL; + PyObject *__pyx_v_item = NULL; + __pyx_t_5numpy_uint32_t __pyx_v_word_idx; + PyObject *__pyx_v_word_subwords = NULL; + PyObject *__pyx_v_subword = NULL; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_t_2; + PyObject *__pyx_t_3 = NULL; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_t_4; + int __pyx_t_5; + Py_ssize_t __pyx_t_6; + int __pyx_t_7; + PyObject *__pyx_t_8 = NULL; + unsigned PY_LONG_LONG __pyx_t_9; + PyObject *(*__pyx_t_10)(PyObject *); + Py_ssize_t __pyx_t_11; + PyObject *(*__pyx_t_12)(PyObject *); + PyObject *__pyx_t_13 = NULL; + PyObject *__pyx_t_14 = NULL; + __pyx_t_5numpy_uint32_t __pyx_t_15; + Py_ssize_t __pyx_t_16; + int __pyx_t_17; + PyObject *__pyx_t_18 = NULL; + int __pyx_t_19; + int __pyx_t_20; + int __pyx_t_21; + int __pyx_t_22; + __Pyx_RefNannySetupContext("train_batch_sg", 0); + + /* "gensim/models/fasttext_inner.pyx":131 + * + * def train_batch_sg(model, sentences, alpha, _work, _l1): + * cdef int hs = model.hs # <<<<<<<<<<<<<< + * cdef int negative = model.negative + * cdef int sample = (model.sample != 0) + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 131, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 131, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_hs = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":132 + * def train_batch_sg(model, sentences, alpha, _work, _l1): + * cdef int hs = model.hs + * cdef int negative = model.negative # <<<<<<<<<<<<<< + * cdef int sample = (model.sample != 0) + * + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 132, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 132, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_negative = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":133 + * cdef int hs = model.hs + * cdef int negative = model.negative + * cdef int sample = (model.sample != 0) # <<<<<<<<<<<<<< + * + * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_sample); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 133, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = PyObject_RichCompare(__pyx_t_1, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 133, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 133, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_sample = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":135 + * cdef int sample = (model.sample != 0) + * + * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) # <<<<<<<<<<<<<< + * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) + * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 135, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn0_vocab); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 135, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 135, __pyx_L1_error) + __pyx_v_syn0_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "gensim/models/fasttext_inner.pyx":136 + * + * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) + * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) # <<<<<<<<<<<<<< + * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) + * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_vocab_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 136, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 136, __pyx_L1_error) + __pyx_v_word_locks_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "gensim/models/fasttext_inner.pyx":137 + * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) + * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) + * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) # <<<<<<<<<<<<<< + * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) + * + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 137, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn0_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 137, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 137, __pyx_L1_error) + __pyx_v_syn0_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "gensim/models/fasttext_inner.pyx":138 + * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) + * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) + * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) # <<<<<<<<<<<<<< + * + * cdef REAL_t *work + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_ngrams_lockf); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 138, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 138, __pyx_L1_error) + __pyx_v_word_locks_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "gensim/models/fasttext_inner.pyx":143 + * cdef REAL_t *l1 + * + * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< + * cdef int size = model.layer1_size + * + */ + __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 143, __pyx_L1_error) + __pyx_v__alpha = __pyx_t_4; + + /* "gensim/models/fasttext_inner.pyx":144 + * + * cdef REAL_t _alpha = alpha + * cdef int size = model.layer1_size # <<<<<<<<<<<<<< + * + * cdef int codelens[MAX_SENTENCE_LEN] + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 144, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 144, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_size = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":150 + * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] + * cdef int sentence_idx[MAX_SENTENCE_LEN + 1] + * cdef int window = model.window # <<<<<<<<<<<<<< + * + * cdef int i, j, k + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 150, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 150, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_window = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":153 + * + * cdef int i, j, k + * cdef int effective_words = 0, effective_sentences = 0 # <<<<<<<<<<<<<< + * cdef int sent_idx, idx_start, idx_end + * + */ + __pyx_v_effective_words = 0; + __pyx_v_effective_sentences = 0; + + /* "gensim/models/fasttext_inner.pyx":172 + * cdef np.uint32_t subwords_idx[MAX_SENTENCE_LEN][MAX_SUBWORDS] + * + * if hs: # <<<<<<<<<<<<<< + * syn1 = (np.PyArray_DATA(model.syn1)) + * + */ + __pyx_t_5 = (__pyx_v_hs != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":173 + * + * if hs: + * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< + * + * if negative: + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 173, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 173, __pyx_L1_error) + __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "gensim/models/fasttext_inner.pyx":172 + * cdef np.uint32_t subwords_idx[MAX_SENTENCE_LEN][MAX_SUBWORDS] + * + * if hs: # <<<<<<<<<<<<<< + * syn1 = (np.PyArray_DATA(model.syn1)) + * + */ + } + + /* "gensim/models/fasttext_inner.pyx":175 + * syn1 = (np.PyArray_DATA(model.syn1)) + * + * if negative: # <<<<<<<<<<<<<< + * syn1neg = (np.PyArray_DATA(model.syn1neg)) + * cum_table = (np.PyArray_DATA(model.cum_table)) + */ + __pyx_t_5 = (__pyx_v_negative != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":176 + * + * if negative: + * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< + * cum_table = (np.PyArray_DATA(model.cum_table)) + * cum_table_len = len(model.cum_table) + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 176, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 176, __pyx_L1_error) + __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "gensim/models/fasttext_inner.pyx":177 + * if negative: + * syn1neg = (np.PyArray_DATA(model.syn1neg)) + * cum_table = (np.PyArray_DATA(model.cum_table)) # <<<<<<<<<<<<<< + * cum_table_len = len(model.cum_table) + * if negative or sample: + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 177, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 177, __pyx_L1_error) + __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "gensim/models/fasttext_inner.pyx":178 + * syn1neg = (np.PyArray_DATA(model.syn1neg)) + * cum_table = (np.PyArray_DATA(model.cum_table)) + * cum_table_len = len(model.cum_table) # <<<<<<<<<<<<<< + * if negative or sample: + * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 178, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == ((Py_ssize_t)-1))) __PYX_ERR(0, 178, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_cum_table_len = __pyx_t_6; + + /* "gensim/models/fasttext_inner.pyx":175 + * syn1 = (np.PyArray_DATA(model.syn1)) + * + * if negative: # <<<<<<<<<<<<<< + * syn1neg = (np.PyArray_DATA(model.syn1neg)) + * cum_table = (np.PyArray_DATA(model.cum_table)) + */ + } + + /* "gensim/models/fasttext_inner.pyx":179 + * cum_table = (np.PyArray_DATA(model.cum_table)) + * cum_table_len = len(model.cum_table) + * if negative or sample: # <<<<<<<<<<<<<< + * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) + * + */ + __pyx_t_7 = (__pyx_v_negative != 0); + if (!__pyx_t_7) { + } else { + __pyx_t_5 = __pyx_t_7; + goto __pyx_L6_bool_binop_done; + } + __pyx_t_7 = (__pyx_v_sample != 0); + __pyx_t_5 = __pyx_t_7; + __pyx_L6_bool_binop_done:; + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":180 + * cum_table_len = len(model.cum_table) + * if negative or sample: + * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< + * + * # convert Python structures to primitive types, so we can release the GIL + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 180, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 180, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 180, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 180, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 180, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 180, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 180, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 180, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 180, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_v_next_random = __pyx_t_9; + + /* "gensim/models/fasttext_inner.pyx":179 + * cum_table = (np.PyArray_DATA(model.cum_table)) + * cum_table_len = len(model.cum_table) + * if negative or sample: # <<<<<<<<<<<<<< + * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) + * + */ + } + + /* "gensim/models/fasttext_inner.pyx":183 + * + * # convert Python structures to primitive types, so we can release the GIL + * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< + * l1 = np.PyArray_DATA(_l1) + * + */ + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 183, __pyx_L1_error) + __pyx_v_work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); + + /* "gensim/models/fasttext_inner.pyx":184 + * # convert Python structures to primitive types, so we can release the GIL + * work = np.PyArray_DATA(_work) + * l1 = np.PyArray_DATA(_l1) # <<<<<<<<<<<<<< + * + * # prepare C structures so we can go "full C" and release the Python GIL + */ + if (!(likely(((__pyx_v__l1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__l1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 184, __pyx_L1_error) + __pyx_v_l1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__l1))); + + /* "gensim/models/fasttext_inner.pyx":187 + * + * # prepare C structures so we can go "full C" and release the Python GIL + * vlookup = model.wv.vocab # <<<<<<<<<<<<<< + * sentence_idx[0] = 0 # indices of the first sentence always start at 0 + * for sent in sentences: + */ + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 187, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 187, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_v_vlookup = __pyx_t_3; + __pyx_t_3 = 0; + + /* "gensim/models/fasttext_inner.pyx":188 + * # prepare C structures so we can go "full C" and release the Python GIL + * vlookup = model.wv.vocab + * sentence_idx[0] = 0 # indices of the first sentence always start at 0 # <<<<<<<<<<<<<< + * for sent in sentences: + * if not sent: + */ + (__pyx_v_sentence_idx[0]) = 0; + + /* "gensim/models/fasttext_inner.pyx":189 + * vlookup = model.wv.vocab + * sentence_idx[0] = 0 # indices of the first sentence always start at 0 + * for sent in sentences: # <<<<<<<<<<<<<< + * if not sent: + * continue # ignore empty sentences; leave effective_sentences unchanged + */ + if (likely(PyList_CheckExact(__pyx_v_sentences)) || PyTuple_CheckExact(__pyx_v_sentences)) { + __pyx_t_3 = __pyx_v_sentences; __Pyx_INCREF(__pyx_t_3); __pyx_t_6 = 0; + __pyx_t_10 = NULL; + } else { + __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 189, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 189, __pyx_L1_error) + } + for (;;) { + if (likely(!__pyx_t_10)) { + if (likely(PyList_CheckExact(__pyx_t_3))) { + if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_3)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 189, __pyx_L1_error) + #else + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 189, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + #endif + } else { + if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_3)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 189, __pyx_L1_error) + #else + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 189, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + #endif + } + } else { + __pyx_t_8 = __pyx_t_10(__pyx_t_3); + if (unlikely(!__pyx_t_8)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(0, 189, __pyx_L1_error) + } + break; + } + __Pyx_GOTREF(__pyx_t_8); + } + __Pyx_XDECREF_SET(__pyx_v_sent, __pyx_t_8); + __pyx_t_8 = 0; + + /* "gensim/models/fasttext_inner.pyx":190 + * sentence_idx[0] = 0 # indices of the first sentence always start at 0 + * for sent in sentences: + * if not sent: # <<<<<<<<<<<<<< + * continue # ignore empty sentences; leave effective_sentences unchanged + * for token in sent: + */ + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 190, __pyx_L1_error) + __pyx_t_7 = ((!__pyx_t_5) != 0); + if (__pyx_t_7) { + + /* "gensim/models/fasttext_inner.pyx":191 + * for sent in sentences: + * if not sent: + * continue # ignore empty sentences; leave effective_sentences unchanged # <<<<<<<<<<<<<< + * for token in sent: + * word = vlookup[token] if token in vlookup else None + */ + goto __pyx_L8_continue; + + /* "gensim/models/fasttext_inner.pyx":190 + * sentence_idx[0] = 0 # indices of the first sentence always start at 0 + * for sent in sentences: + * if not sent: # <<<<<<<<<<<<<< + * continue # ignore empty sentences; leave effective_sentences unchanged + * for token in sent: + */ + } + + /* "gensim/models/fasttext_inner.pyx":192 + * if not sent: + * continue # ignore empty sentences; leave effective_sentences unchanged + * for token in sent: # <<<<<<<<<<<<<< + * word = vlookup[token] if token in vlookup else None + * if word is None: + */ + if (likely(PyList_CheckExact(__pyx_v_sent)) || PyTuple_CheckExact(__pyx_v_sent)) { + __pyx_t_8 = __pyx_v_sent; __Pyx_INCREF(__pyx_t_8); __pyx_t_11 = 0; + __pyx_t_12 = NULL; + } else { + __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 192, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 192, __pyx_L1_error) + } + for (;;) { + if (likely(!__pyx_t_12)) { + if (likely(PyList_CheckExact(__pyx_t_8))) { + if (__pyx_t_11 >= PyList_GET_SIZE(__pyx_t_8)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 192, __pyx_L1_error) + #else + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 192, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + #endif + } else { + if (__pyx_t_11 >= PyTuple_GET_SIZE(__pyx_t_8)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 192, __pyx_L1_error) + #else + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 192, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + #endif + } + } else { + __pyx_t_1 = __pyx_t_12(__pyx_t_8); + if (unlikely(!__pyx_t_1)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(0, 192, __pyx_L1_error) + } + break; + } + __Pyx_GOTREF(__pyx_t_1); + } + __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_1); + __pyx_t_1 = 0; + + /* "gensim/models/fasttext_inner.pyx":193 + * continue # ignore empty sentences; leave effective_sentences unchanged + * for token in sent: + * word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< + * if word is None: + * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window + */ + __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 193, __pyx_L1_error) + if ((__pyx_t_7 != 0)) { + __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 193, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __pyx_t_1 = __pyx_t_13; + __pyx_t_13 = 0; + } else { + __Pyx_INCREF(Py_None); + __pyx_t_1 = Py_None; + } + __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_1); + __pyx_t_1 = 0; + + /* "gensim/models/fasttext_inner.pyx":194 + * for token in sent: + * word = vlookup[token] if token in vlookup else None + * if word is None: # <<<<<<<<<<<<<< + * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window + * if sample and word.sample_int < random_int32(&next_random): + */ + __pyx_t_7 = (__pyx_v_word == Py_None); + __pyx_t_5 = (__pyx_t_7 != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":195 + * word = vlookup[token] if token in vlookup else None + * if word is None: + * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window # <<<<<<<<<<<<<< + * if sample and word.sample_int < random_int32(&next_random): + * continue + */ + goto __pyx_L11_continue; + + /* "gensim/models/fasttext_inner.pyx":194 + * for token in sent: + * word = vlookup[token] if token in vlookup else None + * if word is None: # <<<<<<<<<<<<<< + * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window + * if sample and word.sample_int < random_int32(&next_random): + */ + } + + /* "gensim/models/fasttext_inner.pyx":196 + * if word is None: + * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window + * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< + * continue + * indexes[effective_words] = word.index + */ + __pyx_t_7 = (__pyx_v_sample != 0); + if (__pyx_t_7) { + } else { + __pyx_t_5 = __pyx_t_7; + goto __pyx_L15_bool_binop_done; + } + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 196, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 196, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 196, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 196, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + __pyx_t_5 = __pyx_t_7; + __pyx_L15_bool_binop_done:; + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":197 + * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window + * if sample and word.sample_int < random_int32(&next_random): + * continue # <<<<<<<<<<<<<< + * indexes[effective_words] = word.index + * if hs: + */ + goto __pyx_L11_continue; + + /* "gensim/models/fasttext_inner.pyx":196 + * if word is None: + * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window + * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< + * continue + * indexes[effective_words] = word.index + */ + } + + /* "gensim/models/fasttext_inner.pyx":198 + * if sample and word.sample_int < random_int32(&next_random): + * continue + * indexes[effective_words] = word.index # <<<<<<<<<<<<<< + * if hs: + * codelens[effective_words] = len(word.code) + */ + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 198, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 198, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + (__pyx_v_indexes[__pyx_v_effective_words]) = __pyx_t_15; + + /* "gensim/models/fasttext_inner.pyx":199 + * continue + * indexes[effective_words] = word.index + * if hs: # <<<<<<<<<<<<<< + * codelens[effective_words] = len(word.code) + * codes[effective_words] = np.PyArray_DATA(word.code) + */ + __pyx_t_5 = (__pyx_v_hs != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":200 + * indexes[effective_words] = word.index + * if hs: + * codelens[effective_words] = len(word.code) # <<<<<<<<<<<<<< + * codes[effective_words] = np.PyArray_DATA(word.code) + * points[effective_words] = np.PyArray_DATA(word.point) + */ + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 200, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __pyx_t_16 = PyObject_Length(__pyx_t_14); if (unlikely(__pyx_t_16 == ((Py_ssize_t)-1))) __PYX_ERR(0, 200, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + (__pyx_v_codelens[__pyx_v_effective_words]) = ((int)__pyx_t_16); + + /* "gensim/models/fasttext_inner.pyx":201 + * if hs: + * codelens[effective_words] = len(word.code) + * codes[effective_words] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< + * points[effective_words] = np.PyArray_DATA(word.point) + * + */ + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 201, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + if (!(likely(((__pyx_t_14) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_14, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 201, __pyx_L1_error) + (__pyx_v_codes[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_14))); + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + + /* "gensim/models/fasttext_inner.pyx":202 + * codelens[effective_words] = len(word.code) + * codes[effective_words] = np.PyArray_DATA(word.code) + * points[effective_words] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< + * + * effective_words += 1 + */ + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 202, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + if (!(likely(((__pyx_t_14) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_14, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 202, __pyx_L1_error) + (__pyx_v_points[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_14))); + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + + /* "gensim/models/fasttext_inner.pyx":199 + * continue + * indexes[effective_words] = word.index + * if hs: # <<<<<<<<<<<<<< + * codelens[effective_words] = len(word.code) + * codes[effective_words] = np.PyArray_DATA(word.code) + */ + } + + /* "gensim/models/fasttext_inner.pyx":204 + * points[effective_words] = np.PyArray_DATA(word.point) + * + * effective_words += 1 # <<<<<<<<<<<<<< + * if effective_words == MAX_SENTENCE_LEN: + * break # TODO: log warning, tally overflow? + */ + __pyx_v_effective_words = (__pyx_v_effective_words + 1); + + /* "gensim/models/fasttext_inner.pyx":205 + * + * effective_words += 1 + * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< + * break # TODO: log warning, tally overflow? + * + */ + __pyx_t_5 = ((__pyx_v_effective_words == 0x2710) != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":206 + * effective_words += 1 + * if effective_words == MAX_SENTENCE_LEN: + * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< + * + * # keep track of which words go into which sentence, so we don't train + */ + goto __pyx_L12_break; + + /* "gensim/models/fasttext_inner.pyx":205 + * + * effective_words += 1 + * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< + * break # TODO: log warning, tally overflow? + * + */ + } + + /* "gensim/models/fasttext_inner.pyx":192 + * if not sent: + * continue # ignore empty sentences; leave effective_sentences unchanged + * for token in sent: # <<<<<<<<<<<<<< + * word = vlookup[token] if token in vlookup else None + * if word is None: + */ + __pyx_L11_continue:; + } + __pyx_L12_break:; + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + + /* "gensim/models/fasttext_inner.pyx":211 + * # across sentence boundaries. + * # indices of sentence number X are between tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 218, __pyx_L1_error) + } + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + for (;;) { + if (likely(!__pyx_t_10)) { + if (likely(PyList_CheckExact(__pyx_t_14))) { + if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_14)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_14, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 218, __pyx_L1_error) + #else + __pyx_t_3 = PySequence_ITEM(__pyx_t_14, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 218, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + #endif + } else { + if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_14)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_14, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 218, __pyx_L1_error) + #else + __pyx_t_3 = PySequence_ITEM(__pyx_t_14, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 218, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + #endif + } + } else { + __pyx_t_3 = __pyx_t_10(__pyx_t_14); + if (unlikely(!__pyx_t_3)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(0, 218, __pyx_L1_error) + } + break; + } + __Pyx_GOTREF(__pyx_t_3); + } + __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_3); + __pyx_t_3 = 0; + __pyx_v_i = __pyx_t_2; + __pyx_t_2 = (__pyx_t_2 + 1); + + /* "gensim/models/fasttext_inner.pyx":219 + * # precompute "reduced window" offsets in a single randint() call + * for i, item in enumerate(model.random.randint(0, window, effective_words)): + * reduced_windows[i] = item # <<<<<<<<<<<<<< + * + * for i in range(effective_words): + */ + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 219, __pyx_L1_error) + (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_15; + + /* "gensim/models/fasttext_inner.pyx":218 + * + * # precompute "reduced window" offsets in a single randint() call + * for i, item in enumerate(model.random.randint(0, window, effective_words)): # <<<<<<<<<<<<<< + * reduced_windows[i] = item + * + */ + } + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + + /* "gensim/models/fasttext_inner.pyx":221 + * reduced_windows[i] = item + * + * for i in range(effective_words): # <<<<<<<<<<<<<< + * word_idx = indexes[i] + * word_subwords = model.wv.ngrams_word[model.wv.index2word[word_idx]] + */ + __pyx_t_2 = __pyx_v_effective_words; + for (__pyx_t_17 = 0; __pyx_t_17 < __pyx_t_2; __pyx_t_17+=1) { + __pyx_v_i = __pyx_t_17; + + /* "gensim/models/fasttext_inner.pyx":222 + * + * for i in range(effective_words): + * word_idx = indexes[i] # <<<<<<<<<<<<<< + * word_subwords = model.wv.ngrams_word[model.wv.index2word[word_idx]] + * subwords_idx_len[i] = len(word_subwords) + 1 + */ + __pyx_v_word_idx = (__pyx_v_indexes[__pyx_v_i]); + + /* "gensim/models/fasttext_inner.pyx":223 + * for i in range(effective_words): + * word_idx = indexes[i] + * word_subwords = model.wv.ngrams_word[model.wv.index2word[word_idx]] # <<<<<<<<<<<<<< + * subwords_idx_len[i] = len(word_subwords) + 1 + * subwords_idx[i][0] = word_idx + */ + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 223, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_ngrams_word); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 223, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 223, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_index2word); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 223, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_18); + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + __pyx_t_14 = __Pyx_GetItemInt(__pyx_t_18, __pyx_v_word_idx, __pyx_t_5numpy_uint32_t, 0, __Pyx_PyInt_From_npy_uint32, 0, 0, 1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 223, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; + __pyx_t_18 = PyObject_GetItem(__pyx_t_3, __pyx_t_14); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 223, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_18); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + __Pyx_XDECREF_SET(__pyx_v_word_subwords, __pyx_t_18); + __pyx_t_18 = 0; + + /* "gensim/models/fasttext_inner.pyx":224 + * word_idx = indexes[i] + * word_subwords = model.wv.ngrams_word[model.wv.index2word[word_idx]] + * subwords_idx_len[i] = len(word_subwords) + 1 # <<<<<<<<<<<<<< + * subwords_idx[i][0] = word_idx + * for j, subword in enumerate(word_subwords): + */ + __pyx_t_6 = PyObject_Length(__pyx_v_word_subwords); if (unlikely(__pyx_t_6 == ((Py_ssize_t)-1))) __PYX_ERR(0, 224, __pyx_L1_error) + (__pyx_v_subwords_idx_len[__pyx_v_i]) = (__pyx_t_6 + 1); + + /* "gensim/models/fasttext_inner.pyx":225 + * word_subwords = model.wv.ngrams_word[model.wv.index2word[word_idx]] + * subwords_idx_len[i] = len(word_subwords) + 1 + * subwords_idx[i][0] = word_idx # <<<<<<<<<<<<<< + * for j, subword in enumerate(word_subwords): + * subwords_idx[i][j+1] = model.wv.ngrams[subword] + */ + ((__pyx_v_subwords_idx[__pyx_v_i])[0]) = __pyx_v_word_idx; + + /* "gensim/models/fasttext_inner.pyx":226 + * subwords_idx_len[i] = len(word_subwords) + 1 + * subwords_idx[i][0] = word_idx + * for j, subword in enumerate(word_subwords): # <<<<<<<<<<<<<< + * subwords_idx[i][j+1] = model.wv.ngrams[subword] + * + */ + __pyx_t_19 = 0; + if (likely(PyList_CheckExact(__pyx_v_word_subwords)) || PyTuple_CheckExact(__pyx_v_word_subwords)) { + __pyx_t_18 = __pyx_v_word_subwords; __Pyx_INCREF(__pyx_t_18); __pyx_t_6 = 0; + __pyx_t_10 = NULL; + } else { + __pyx_t_6 = -1; __pyx_t_18 = PyObject_GetIter(__pyx_v_word_subwords); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 226, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_18); + __pyx_t_10 = Py_TYPE(__pyx_t_18)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 226, __pyx_L1_error) + } + for (;;) { + if (likely(!__pyx_t_10)) { + if (likely(PyList_CheckExact(__pyx_t_18))) { + if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_18)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_14 = PyList_GET_ITEM(__pyx_t_18, __pyx_t_6); __Pyx_INCREF(__pyx_t_14); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 226, __pyx_L1_error) + #else + __pyx_t_14 = PySequence_ITEM(__pyx_t_18, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 226, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + #endif + } else { + if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_18)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_14 = PyTuple_GET_ITEM(__pyx_t_18, __pyx_t_6); __Pyx_INCREF(__pyx_t_14); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 226, __pyx_L1_error) + #else + __pyx_t_14 = PySequence_ITEM(__pyx_t_18, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 226, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + #endif + } + } else { + __pyx_t_14 = __pyx_t_10(__pyx_t_18); + if (unlikely(!__pyx_t_14)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(0, 226, __pyx_L1_error) + } + break; + } + __Pyx_GOTREF(__pyx_t_14); + } + __Pyx_XDECREF_SET(__pyx_v_subword, __pyx_t_14); + __pyx_t_14 = 0; + __pyx_v_j = __pyx_t_19; + __pyx_t_19 = (__pyx_t_19 + 1); + + /* "gensim/models/fasttext_inner.pyx":227 + * subwords_idx[i][0] = word_idx + * for j, subword in enumerate(word_subwords): + * subwords_idx[i][j+1] = model.wv.ngrams[subword] # <<<<<<<<<<<<<< + * + * with nogil: + */ + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 227, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 227, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + __pyx_t_14 = PyObject_GetItem(__pyx_t_3, __pyx_v_subword); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 227, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 227, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + ((__pyx_v_subwords_idx[__pyx_v_i])[(__pyx_v_j + 1)]) = __pyx_t_15; + + /* "gensim/models/fasttext_inner.pyx":226 + * subwords_idx_len[i] = len(word_subwords) + 1 + * subwords_idx[i][0] = word_idx + * for j, subword in enumerate(word_subwords): # <<<<<<<<<<<<<< + * subwords_idx[i][j+1] = model.wv.ngrams[subword] + * + */ + } + __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; + } + + /* "gensim/models/fasttext_inner.pyx":229 + * subwords_idx[i][j+1] = model.wv.ngrams[subword] + * + * with nogil: # <<<<<<<<<<<<<< + * for sent_idx in range(effective_sentences): + * idx_start = sentence_idx[sent_idx] + */ + { + #ifdef WITH_THREAD + PyThreadState *_save; + Py_UNBLOCK_THREADS + __Pyx_FastGIL_Remember(); + #endif + /*try:*/ { + + /* "gensim/models/fasttext_inner.pyx":230 + * + * with nogil: + * for sent_idx in range(effective_sentences): # <<<<<<<<<<<<<< + * idx_start = sentence_idx[sent_idx] + * idx_end = sentence_idx[sent_idx + 1] + */ + __pyx_t_2 = __pyx_v_effective_sentences; + for (__pyx_t_17 = 0; __pyx_t_17 < __pyx_t_2; __pyx_t_17+=1) { + __pyx_v_sent_idx = __pyx_t_17; + + /* "gensim/models/fasttext_inner.pyx":231 + * with nogil: + * for sent_idx in range(effective_sentences): + * idx_start = sentence_idx[sent_idx] # <<<<<<<<<<<<<< + * idx_end = sentence_idx[sent_idx + 1] + * for i in range(idx_start, idx_end): + */ + __pyx_v_idx_start = (__pyx_v_sentence_idx[__pyx_v_sent_idx]); + + /* "gensim/models/fasttext_inner.pyx":232 + * for sent_idx in range(effective_sentences): + * idx_start = sentence_idx[sent_idx] + * idx_end = sentence_idx[sent_idx + 1] # <<<<<<<<<<<<<< + * for i in range(idx_start, idx_end): + * j = i - window + reduced_windows[i] + */ + __pyx_v_idx_end = (__pyx_v_sentence_idx[(__pyx_v_sent_idx + 1)]); + + /* "gensim/models/fasttext_inner.pyx":233 + * idx_start = sentence_idx[sent_idx] + * idx_end = sentence_idx[sent_idx + 1] + * for i in range(idx_start, idx_end): # <<<<<<<<<<<<<< + * j = i - window + reduced_windows[i] + * if j < idx_start: + */ + __pyx_t_19 = __pyx_v_idx_end; + for (__pyx_t_20 = __pyx_v_idx_start; __pyx_t_20 < __pyx_t_19; __pyx_t_20+=1) { + __pyx_v_i = __pyx_t_20; + + /* "gensim/models/fasttext_inner.pyx":234 + * idx_end = sentence_idx[sent_idx + 1] + * for i in range(idx_start, idx_end): + * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< + * if j < idx_start: + * j = idx_start + */ + __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); + + /* "gensim/models/fasttext_inner.pyx":235 + * for i in range(idx_start, idx_end): + * j = i - window + reduced_windows[i] + * if j < idx_start: # <<<<<<<<<<<<<< + * j = idx_start + * k = i + window + 1 - reduced_windows[i] + */ + __pyx_t_5 = ((__pyx_v_j < __pyx_v_idx_start) != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":236 + * j = i - window + reduced_windows[i] + * if j < idx_start: + * j = idx_start # <<<<<<<<<<<<<< + * k = i + window + 1 - reduced_windows[i] + * if k > idx_end: + */ + __pyx_v_j = __pyx_v_idx_start; + + /* "gensim/models/fasttext_inner.pyx":235 + * for i in range(idx_start, idx_end): + * j = i - window + reduced_windows[i] + * if j < idx_start: # <<<<<<<<<<<<<< + * j = idx_start + * k = i + window + 1 - reduced_windows[i] + */ + } + + /* "gensim/models/fasttext_inner.pyx":237 + * if j < idx_start: + * j = idx_start + * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< + * if k > idx_end: + * k = idx_end + */ + __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); + + /* "gensim/models/fasttext_inner.pyx":238 + * j = idx_start + * k = i + window + 1 - reduced_windows[i] + * if k > idx_end: # <<<<<<<<<<<<<< + * k = idx_end + * for j in range(j, k): + */ + __pyx_t_5 = ((__pyx_v_k > __pyx_v_idx_end) != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":239 + * k = i + window + 1 - reduced_windows[i] + * if k > idx_end: + * k = idx_end # <<<<<<<<<<<<<< + * for j in range(j, k): + * if j == i: + */ + __pyx_v_k = __pyx_v_idx_end; + + /* "gensim/models/fasttext_inner.pyx":238 + * j = idx_start + * k = i + window + 1 - reduced_windows[i] + * if k > idx_end: # <<<<<<<<<<<<<< + * k = idx_end + * for j in range(j, k): + */ + } + + /* "gensim/models/fasttext_inner.pyx":240 + * if k > idx_end: + * k = idx_end + * for j in range(j, k): # <<<<<<<<<<<<<< + * if j == i: + * continue + */ + __pyx_t_21 = __pyx_v_k; + for (__pyx_t_22 = __pyx_v_j; __pyx_t_22 < __pyx_t_21; __pyx_t_22+=1) { + __pyx_v_j = __pyx_t_22; + + /* "gensim/models/fasttext_inner.pyx":241 + * k = idx_end + * for j in range(j, k): + * if j == i: # <<<<<<<<<<<<<< + * continue + * if hs: + */ + __pyx_t_5 = ((__pyx_v_j == __pyx_v_i) != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":242 + * for j in range(j, k): + * if j == i: + * continue # <<<<<<<<<<<<<< + * if hs: + * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) + */ + goto __pyx_L35_continue; + + /* "gensim/models/fasttext_inner.pyx":241 + * k = idx_end + * for j in range(j, k): + * if j == i: # <<<<<<<<<<<<<< + * continue + * if hs: + */ + } + + /* "gensim/models/fasttext_inner.pyx":243 + * if j == i: + * continue + * if hs: # <<<<<<<<<<<<<< + * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) + * if negative: + */ + __pyx_t_5 = (__pyx_v_hs != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":244 + * continue + * if hs: + * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) # <<<<<<<<<<<<<< + * if negative: + * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) + */ + __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs((__pyx_v_points[__pyx_v_j]), (__pyx_v_codes[__pyx_v_j]), (__pyx_v_codelens[__pyx_v_j]), __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1, __pyx_v_size, (__pyx_v_subwords_idx[__pyx_v_i]), (__pyx_v_subwords_idx_len[__pyx_v_i]), __pyx_v__alpha, __pyx_v_work, __pyx_v_l1, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); + + /* "gensim/models/fasttext_inner.pyx":243 + * if j == i: + * continue + * if hs: # <<<<<<<<<<<<<< + * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) + * if negative: + */ + } + + /* "gensim/models/fasttext_inner.pyx":245 + * if hs: + * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) + * if negative: # <<<<<<<<<<<<<< + * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) + * + */ + __pyx_t_5 = (__pyx_v_negative != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":246 + * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) + * if negative: + * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) # <<<<<<<<<<<<<< + * + * return effective_words + */ + __pyx_v_next_random = __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), (__pyx_v_subwords_idx[__pyx_v_i]), (__pyx_v_subwords_idx_len[__pyx_v_i]), __pyx_v__alpha, __pyx_v_work, __pyx_v_l1, __pyx_v_next_random, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); + + /* "gensim/models/fasttext_inner.pyx":245 + * if hs: + * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) + * if negative: # <<<<<<<<<<<<<< + * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) + * + */ + } + __pyx_L35_continue:; + } + } + } + } + + /* "gensim/models/fasttext_inner.pyx":229 + * subwords_idx[i][j+1] = model.wv.ngrams[subword] + * + * with nogil: # <<<<<<<<<<<<<< + * for sent_idx in range(effective_sentences): + * idx_start = sentence_idx[sent_idx] + */ + /*finally:*/ { + /*normal exit:*/{ + #ifdef WITH_THREAD + __Pyx_FastGIL_Forget(); + Py_BLOCK_THREADS + #endif + goto __pyx_L28; + } + __pyx_L28:; + } + } + + /* "gensim/models/fasttext_inner.pyx":248 + * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) + * + * return effective_words # <<<<<<<<<<<<<< + * + * + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_18 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 248, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_18); + __pyx_r = __pyx_t_18; + __pyx_t_18 = 0; + goto __pyx_L0; + + /* "gensim/models/fasttext_inner.pyx":130 + * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) + * + * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< + * cdef int hs = model.hs + * cdef int negative = model.negative + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_XDECREF(__pyx_t_13); + __Pyx_XDECREF(__pyx_t_14); + __Pyx_XDECREF(__pyx_t_18); + __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_sg", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_vlookup); + __Pyx_XDECREF(__pyx_v_sent); + __Pyx_XDECREF(__pyx_v_token); + __Pyx_XDECREF(__pyx_v_word); + __Pyx_XDECREF(__pyx_v_item); + __Pyx_XDECREF(__pyx_v_word_subwords); + __Pyx_XDECREF(__pyx_v_subword); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "gensim/models/fasttext_inner.pyx":251 + * + * + * def init(): # <<<<<<<<<<<<<< + * """ + * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_3init(PyObject *__pyx_self, CYTHON_UNUSED PyObject *unused); /*proto*/ +static char __pyx_doc_6gensim_6models_14fasttext_inner_2init[] = "\n Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized\n into table EXP_TABLE. Also calculate log(sigmoid(x)) into LOG_TABLE.\n "; +static PyMethodDef __pyx_mdef_6gensim_6models_14fasttext_inner_3init = {"init", (PyCFunction)__pyx_pw_6gensim_6models_14fasttext_inner_3init, METH_NOARGS, __pyx_doc_6gensim_6models_14fasttext_inner_2init}; +static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_3init(PyObject *__pyx_self, CYTHON_UNUSED PyObject *unused) { + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("init (wrapper)", 0); + __pyx_r = __pyx_pf_6gensim_6models_14fasttext_inner_2init(__pyx_self); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED PyObject *__pyx_self) { + int __pyx_v_i; + float *__pyx_v_x; + float *__pyx_v_y; + float __pyx_v_expected; + int __pyx_v_size; + double __pyx_v_d_res; + float *__pyx_v_p_res; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + float __pyx_t_1[1]; + float __pyx_t_2[1]; + int __pyx_t_3; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_t_4; + int __pyx_t_5; + __Pyx_RefNannySetupContext("init", 0); + + /* "gensim/models/fasttext_inner.pyx":260 + * + * cdef int i + * cdef float *x = [10.0] # <<<<<<<<<<<<<< + * cdef float *y = [0.01] + * cdef float expected = 0.1 + */ + __pyx_t_1[0] = ((float)10.0); + __pyx_v_x = __pyx_t_1; + + /* "gensim/models/fasttext_inner.pyx":261 + * cdef int i + * cdef float *x = [10.0] + * cdef float *y = [0.01] # <<<<<<<<<<<<<< + * cdef float expected = 0.1 + * cdef int size = 1 + */ + __pyx_t_2[0] = ((float)0.01); + __pyx_v_y = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":262 + * cdef float *x = [10.0] + * cdef float *y = [0.01] + * cdef float expected = 0.1 # <<<<<<<<<<<<<< + * cdef int size = 1 + * cdef double d_res + */ + __pyx_v_expected = ((float)0.1); + + /* "gensim/models/fasttext_inner.pyx":263 + * cdef float *y = [0.01] + * cdef float expected = 0.1 + * cdef int size = 1 # <<<<<<<<<<<<<< + * cdef double d_res + * cdef float *p_res + */ + __pyx_v_size = 1; + + /* "gensim/models/fasttext_inner.pyx":268 + * + * # build the sigmoid table + * for i in range(EXP_TABLE_SIZE): # <<<<<<<<<<<<<< + * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) + * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) + */ + for (__pyx_t_3 = 0; __pyx_t_3 < 0x3E8; __pyx_t_3+=1) { + __pyx_v_i = __pyx_t_3; + + /* "gensim/models/fasttext_inner.pyx":269 + * # build the sigmoid table + * for i in range(EXP_TABLE_SIZE): + * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) # <<<<<<<<<<<<<< + * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) + * LOG_TABLE[i] = log( EXP_TABLE[i] ) + */ + (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)exp(((((__pyx_v_i / ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0x3E8)) * 2.0) - 1.0) * 6.0))); + + /* "gensim/models/fasttext_inner.pyx":270 + * for i in range(EXP_TABLE_SIZE): + * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) + * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) # <<<<<<<<<<<<<< + * LOG_TABLE[i] = log( EXP_TABLE[i] ) + * + */ + __pyx_t_4 = ((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) + 1.0); + if (unlikely(__pyx_t_4 == 0)) { + PyErr_SetString(PyExc_ZeroDivisionError, "float division"); + __PYX_ERR(0, 270, __pyx_L1_error) + } + (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) / __pyx_t_4)); + + /* "gensim/models/fasttext_inner.pyx":271 + * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) + * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) + * LOG_TABLE[i] = log( EXP_TABLE[i] ) # <<<<<<<<<<<<<< + * + * # check whether sdot returns double or float + */ + (__pyx_v_6gensim_6models_14fasttext_inner_LOG_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)log((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]))); + } + + /* "gensim/models/fasttext_inner.pyx":274 + * + * # check whether sdot returns double or float + * d_res = dsdot(&size, x, &ONE, y, &ONE) # <<<<<<<<<<<<<< + * p_res = &d_res + * if (abs(d_res - expected) < 0.0001): + */ + __pyx_v_d_res = __pyx_v_6gensim_6models_14word2vec_inner_dsdot((&__pyx_v_size), __pyx_v_x, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_y, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":275 + * # check whether sdot returns double or float + * d_res = dsdot(&size, x, &ONE, y, &ONE) + * p_res = &d_res # <<<<<<<<<<<<<< + * if (abs(d_res - expected) < 0.0001): + * our_dot = our_dot_double + */ + __pyx_v_p_res = ((float *)(&__pyx_v_d_res)); + + /* "gensim/models/fasttext_inner.pyx":276 + * d_res = dsdot(&size, x, &ONE, y, &ONE) + * p_res = &d_res + * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< + * our_dot = our_dot_double + * our_saxpy = saxpy + */ + __pyx_t_5 = ((fabs((__pyx_v_d_res - __pyx_v_expected)) < 0.0001) != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":277 + * p_res = &d_res + * if (abs(d_res - expected) < 0.0001): + * our_dot = our_dot_double # <<<<<<<<<<<<<< + * our_saxpy = saxpy + * return 0 # double + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_double; + + /* "gensim/models/fasttext_inner.pyx":278 + * if (abs(d_res - expected) < 0.0001): + * our_dot = our_dot_double + * our_saxpy = saxpy # <<<<<<<<<<<<<< + * return 0 # double + * elif (abs(p_res[0] - expected) < 0.0001): + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_6gensim_6models_14word2vec_inner_saxpy; + + /* "gensim/models/fasttext_inner.pyx":279 + * our_dot = our_dot_double + * our_saxpy = saxpy + * return 0 # double # <<<<<<<<<<<<<< + * elif (abs(p_res[0] - expected) < 0.0001): + * our_dot = our_dot_float + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_int_0); + __pyx_r = __pyx_int_0; + goto __pyx_L0; + + /* "gensim/models/fasttext_inner.pyx":276 + * d_res = dsdot(&size, x, &ONE, y, &ONE) + * p_res = &d_res + * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< + * our_dot = our_dot_double + * our_saxpy = saxpy + */ + } + + /* "gensim/models/fasttext_inner.pyx":280 + * our_saxpy = saxpy + * return 0 # double + * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< + * our_dot = our_dot_float + * our_saxpy = saxpy + */ + __pyx_t_5 = ((fabsf(((__pyx_v_p_res[0]) - __pyx_v_expected)) < 0.0001) != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":281 + * return 0 # double + * elif (abs(p_res[0] - expected) < 0.0001): + * our_dot = our_dot_float # <<<<<<<<<<<<<< + * our_saxpy = saxpy + * return 1 # float + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_float; + + /* "gensim/models/fasttext_inner.pyx":282 + * elif (abs(p_res[0] - expected) < 0.0001): + * our_dot = our_dot_float + * our_saxpy = saxpy # <<<<<<<<<<<<<< + * return 1 # float + * else: + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_6gensim_6models_14word2vec_inner_saxpy; + + /* "gensim/models/fasttext_inner.pyx":283 + * our_dot = our_dot_float + * our_saxpy = saxpy + * return 1 # float # <<<<<<<<<<<<<< + * else: + * # neither => use cython loops, no BLAS + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_int_1); + __pyx_r = __pyx_int_1; + goto __pyx_L0; + + /* "gensim/models/fasttext_inner.pyx":280 + * our_saxpy = saxpy + * return 0 # double + * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< + * our_dot = our_dot_float + * our_saxpy = saxpy + */ + } + + /* "gensim/models/fasttext_inner.pyx":287 + * # neither => use cython loops, no BLAS + * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here + * our_dot = our_dot_noblas # <<<<<<<<<<<<<< + * our_saxpy = our_saxpy_noblas + * return 2 + */ + /*else*/ { + __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_noblas; + + /* "gensim/models/fasttext_inner.pyx":288 + * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here + * our_dot = our_dot_noblas + * our_saxpy = our_saxpy_noblas # <<<<<<<<<<<<<< + * return 2 + * + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas; + + /* "gensim/models/fasttext_inner.pyx":289 + * our_dot = our_dot_noblas + * our_saxpy = our_saxpy_noblas + * return 2 # <<<<<<<<<<<<<< + * + * FAST_VERSION = init() # initialize the module + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_int_2); + __pyx_r = __pyx_int_2; + goto __pyx_L0; + } + + /* "gensim/models/fasttext_inner.pyx":251 + * + * + * def init(): # <<<<<<<<<<<<<< + * """ + * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_AddTraceback("gensim.models.fasttext_inner.init", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":214 + * # experimental exception made for __getbuffer__ and __releasebuffer__ + * # -- the details of this may change. + * def __getbuffer__(ndarray self, Py_buffer* info, int flags): # <<<<<<<<<<<<<< + * # This implementation of getbuffer is geared towards Cython + * # requirements, and does not yet fullfill the PEP. + */ + +/* Python wrapper */ +static CYTHON_UNUSED int __pyx_pw_5numpy_7ndarray_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/ +static CYTHON_UNUSED int __pyx_pw_5numpy_7ndarray_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) { + int __pyx_r; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__getbuffer__ (wrapper)", 0); + __pyx_r = __pyx_pf_5numpy_7ndarray___getbuffer__(((PyArrayObject *)__pyx_v_self), ((Py_buffer *)__pyx_v_info), ((int)__pyx_v_flags)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) { + int __pyx_v_copy_shape; + int __pyx_v_i; + int __pyx_v_ndim; + int __pyx_v_endian_detector; + int __pyx_v_little_endian; + int __pyx_v_t; + char *__pyx_v_f; + PyArray_Descr *__pyx_v_descr = 0; + int __pyx_v_offset; + int __pyx_v_hasfields; + int __pyx_r; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + int __pyx_t_2; + PyObject *__pyx_t_3 = NULL; + int __pyx_t_4; + int __pyx_t_5; + PyObject *__pyx_t_6 = NULL; + char *__pyx_t_7; + __Pyx_RefNannySetupContext("__getbuffer__", 0); + if (__pyx_v_info != NULL) { + __pyx_v_info->obj = Py_None; __Pyx_INCREF(Py_None); + __Pyx_GIVEREF(__pyx_v_info->obj); + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":220 + * # of flags + * + * if info == NULL: return # <<<<<<<<<<<<<< + * + * cdef int copy_shape, i, ndim + */ + __pyx_t_1 = ((__pyx_v_info == NULL) != 0); + if (__pyx_t_1) { + __pyx_r = 0; + goto __pyx_L0; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":223 + * + * cdef int copy_shape, i, ndim + * cdef int endian_detector = 1 # <<<<<<<<<<<<<< + * cdef bint little_endian = ((&endian_detector)[0] != 0) + * + */ + __pyx_v_endian_detector = 1; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":224 + * cdef int copy_shape, i, ndim + * cdef int endian_detector = 1 + * cdef bint little_endian = ((&endian_detector)[0] != 0) # <<<<<<<<<<<<<< + * + * ndim = PyArray_NDIM(self) + */ + __pyx_v_little_endian = ((((char *)(&__pyx_v_endian_detector))[0]) != 0); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":226 + * cdef bint little_endian = ((&endian_detector)[0] != 0) + * + * ndim = PyArray_NDIM(self) # <<<<<<<<<<<<<< + * + * if sizeof(npy_intp) != sizeof(Py_ssize_t): + */ + __pyx_v_ndim = PyArray_NDIM(__pyx_v_self); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":228 + * ndim = PyArray_NDIM(self) + * + * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<< + * copy_shape = 1 + * else: + */ + __pyx_t_1 = (((sizeof(npy_intp)) != (sizeof(Py_ssize_t))) != 0); + if (__pyx_t_1) { + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":229 + * + * if sizeof(npy_intp) != sizeof(Py_ssize_t): + * copy_shape = 1 # <<<<<<<<<<<<<< + * else: + * copy_shape = 0 + */ + __pyx_v_copy_shape = 1; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":228 + * ndim = PyArray_NDIM(self) + * + * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<< + * copy_shape = 1 + * else: + */ + goto __pyx_L4; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":231 + * copy_shape = 1 + * else: + * copy_shape = 0 # <<<<<<<<<<<<<< + * + * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) + */ + /*else*/ { + __pyx_v_copy_shape = 0; + } + __pyx_L4:; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":233 + * copy_shape = 0 + * + * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) # <<<<<<<<<<<<<< + * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): + * raise ValueError(u"ndarray is not C contiguous") + */ + __pyx_t_2 = (((__pyx_v_flags & PyBUF_C_CONTIGUOUS) == PyBUF_C_CONTIGUOUS) != 0); + if (__pyx_t_2) { + } else { + __pyx_t_1 = __pyx_t_2; + goto __pyx_L6_bool_binop_done; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":234 + * + * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) + * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): # <<<<<<<<<<<<<< + * raise ValueError(u"ndarray is not C contiguous") + * + */ + __pyx_t_2 = ((!(PyArray_CHKFLAGS(__pyx_v_self, NPY_C_CONTIGUOUS) != 0)) != 0); + __pyx_t_1 = __pyx_t_2; + __pyx_L6_bool_binop_done:; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":233 + * copy_shape = 0 + * + * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) # <<<<<<<<<<<<<< + * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): + * raise ValueError(u"ndarray is not C contiguous") + */ + if (__pyx_t_1) { + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":235 + * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) + * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): + * raise ValueError(u"ndarray is not C contiguous") # <<<<<<<<<<<<<< + * + * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) + */ + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 235, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_Raise(__pyx_t_3, 0, 0, 0); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __PYX_ERR(1, 235, __pyx_L1_error) + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":233 + * copy_shape = 0 + * + * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) # <<<<<<<<<<<<<< + * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): + * raise ValueError(u"ndarray is not C contiguous") + */ + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":237 + * raise ValueError(u"ndarray is not C contiguous") + * + * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) # <<<<<<<<<<<<<< + * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): + * raise ValueError(u"ndarray is not Fortran contiguous") + */ + __pyx_t_2 = (((__pyx_v_flags & PyBUF_F_CONTIGUOUS) == PyBUF_F_CONTIGUOUS) != 0); + if (__pyx_t_2) { + } else { + __pyx_t_1 = __pyx_t_2; + goto __pyx_L9_bool_binop_done; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":238 + * + * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) + * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): # <<<<<<<<<<<<<< + * raise ValueError(u"ndarray is not Fortran contiguous") + * + */ + __pyx_t_2 = ((!(PyArray_CHKFLAGS(__pyx_v_self, NPY_F_CONTIGUOUS) != 0)) != 0); + __pyx_t_1 = __pyx_t_2; + __pyx_L9_bool_binop_done:; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":237 + * raise ValueError(u"ndarray is not C contiguous") + * + * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) # <<<<<<<<<<<<<< + * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): + * raise ValueError(u"ndarray is not Fortran contiguous") + */ + if (__pyx_t_1) { + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":239 + * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) + * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): + * raise ValueError(u"ndarray is not Fortran contiguous") # <<<<<<<<<<<<<< + * + * info.buf = PyArray_DATA(self) + */ + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 239, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_Raise(__pyx_t_3, 0, 0, 0); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __PYX_ERR(1, 239, __pyx_L1_error) + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":237 + * raise ValueError(u"ndarray is not C contiguous") + * + * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) # <<<<<<<<<<<<<< + * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): + * raise ValueError(u"ndarray is not Fortran contiguous") + */ + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":241 + * raise ValueError(u"ndarray is not Fortran contiguous") + * + * info.buf = PyArray_DATA(self) # <<<<<<<<<<<<<< + * info.ndim = ndim + * if copy_shape: + */ + __pyx_v_info->buf = PyArray_DATA(__pyx_v_self); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":242 + * + * info.buf = PyArray_DATA(self) + * info.ndim = ndim # <<<<<<<<<<<<<< + * if copy_shape: + * # Allocate new buffer for strides and shape info. + */ + __pyx_v_info->ndim = __pyx_v_ndim; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":243 + * info.buf = PyArray_DATA(self) + * info.ndim = ndim + * if copy_shape: # <<<<<<<<<<<<<< + * # Allocate new buffer for strides and shape info. + * # This is allocated as one block, strides first. + */ + __pyx_t_1 = (__pyx_v_copy_shape != 0); + if (__pyx_t_1) { + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":246 + * # Allocate new buffer for strides and shape info. + * # This is allocated as one block, strides first. + * info.strides = PyObject_Malloc(sizeof(Py_ssize_t) * 2 * ndim) # <<<<<<<<<<<<<< + * info.shape = info.strides + ndim + * for i in range(ndim): + */ + __pyx_v_info->strides = ((Py_ssize_t *)PyObject_Malloc((((sizeof(Py_ssize_t)) * 2) * ((size_t)__pyx_v_ndim)))); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":247 + * # This is allocated as one block, strides first. + * info.strides = PyObject_Malloc(sizeof(Py_ssize_t) * 2 * ndim) + * info.shape = info.strides + ndim # <<<<<<<<<<<<<< + * for i in range(ndim): + * info.strides[i] = PyArray_STRIDES(self)[i] + */ + __pyx_v_info->shape = (__pyx_v_info->strides + __pyx_v_ndim); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":248 + * info.strides = PyObject_Malloc(sizeof(Py_ssize_t) * 2 * ndim) + * info.shape = info.strides + ndim + * for i in range(ndim): # <<<<<<<<<<<<<< + * info.strides[i] = PyArray_STRIDES(self)[i] + * info.shape[i] = PyArray_DIMS(self)[i] + */ + __pyx_t_4 = __pyx_v_ndim; + for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { + __pyx_v_i = __pyx_t_5; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":249 + * info.shape = info.strides + ndim + * for i in range(ndim): + * info.strides[i] = PyArray_STRIDES(self)[i] # <<<<<<<<<<<<<< + * info.shape[i] = PyArray_DIMS(self)[i] + * else: + */ + (__pyx_v_info->strides[__pyx_v_i]) = (PyArray_STRIDES(__pyx_v_self)[__pyx_v_i]); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":250 + * for i in range(ndim): + * info.strides[i] = PyArray_STRIDES(self)[i] + * info.shape[i] = PyArray_DIMS(self)[i] # <<<<<<<<<<<<<< + * else: + * info.strides = PyArray_STRIDES(self) + */ + (__pyx_v_info->shape[__pyx_v_i]) = (PyArray_DIMS(__pyx_v_self)[__pyx_v_i]); + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":243 + * info.buf = PyArray_DATA(self) + * info.ndim = ndim + * if copy_shape: # <<<<<<<<<<<<<< + * # Allocate new buffer for strides and shape info. + * # This is allocated as one block, strides first. + */ + goto __pyx_L11; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":252 + * info.shape[i] = PyArray_DIMS(self)[i] + * else: + * info.strides = PyArray_STRIDES(self) # <<<<<<<<<<<<<< + * info.shape = PyArray_DIMS(self) + * info.suboffsets = NULL + */ + /*else*/ { + __pyx_v_info->strides = ((Py_ssize_t *)PyArray_STRIDES(__pyx_v_self)); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":253 + * else: + * info.strides = PyArray_STRIDES(self) + * info.shape = PyArray_DIMS(self) # <<<<<<<<<<<<<< + * info.suboffsets = NULL + * info.itemsize = PyArray_ITEMSIZE(self) + */ + __pyx_v_info->shape = ((Py_ssize_t *)PyArray_DIMS(__pyx_v_self)); + } + __pyx_L11:; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":254 + * info.strides = PyArray_STRIDES(self) + * info.shape = PyArray_DIMS(self) + * info.suboffsets = NULL # <<<<<<<<<<<<<< + * info.itemsize = PyArray_ITEMSIZE(self) + * info.readonly = not PyArray_ISWRITEABLE(self) + */ + __pyx_v_info->suboffsets = NULL; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":255 + * info.shape = PyArray_DIMS(self) + * info.suboffsets = NULL + * info.itemsize = PyArray_ITEMSIZE(self) # <<<<<<<<<<<<<< + * info.readonly = not PyArray_ISWRITEABLE(self) + * + */ + __pyx_v_info->itemsize = PyArray_ITEMSIZE(__pyx_v_self); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":256 + * info.suboffsets = NULL + * info.itemsize = PyArray_ITEMSIZE(self) + * info.readonly = not PyArray_ISWRITEABLE(self) # <<<<<<<<<<<<<< + * + * cdef int t + */ + __pyx_v_info->readonly = (!(PyArray_ISWRITEABLE(__pyx_v_self) != 0)); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":259 + * + * cdef int t + * cdef char* f = NULL # <<<<<<<<<<<<<< + * cdef dtype descr = self.descr + * cdef int offset + */ + __pyx_v_f = NULL; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":260 + * cdef int t + * cdef char* f = NULL + * cdef dtype descr = self.descr # <<<<<<<<<<<<<< + * cdef int offset + * + */ + __pyx_t_3 = ((PyObject *)__pyx_v_self->descr); + __Pyx_INCREF(__pyx_t_3); + __pyx_v_descr = ((PyArray_Descr *)__pyx_t_3); + __pyx_t_3 = 0; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":263 + * cdef int offset + * + * cdef bint hasfields = PyDataType_HASFIELDS(descr) # <<<<<<<<<<<<<< + * + * if not hasfields and not copy_shape: + */ + __pyx_v_hasfields = PyDataType_HASFIELDS(__pyx_v_descr); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":265 + * cdef bint hasfields = PyDataType_HASFIELDS(descr) + * + * if not hasfields and not copy_shape: # <<<<<<<<<<<<<< + * # do not call releasebuffer + * info.obj = None + */ + __pyx_t_2 = ((!(__pyx_v_hasfields != 0)) != 0); + if (__pyx_t_2) { + } else { + __pyx_t_1 = __pyx_t_2; + goto __pyx_L15_bool_binop_done; + } + __pyx_t_2 = ((!(__pyx_v_copy_shape != 0)) != 0); + __pyx_t_1 = __pyx_t_2; + __pyx_L15_bool_binop_done:; + if (__pyx_t_1) { + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":267 + * if not hasfields and not copy_shape: + * # do not call releasebuffer + * info.obj = None # <<<<<<<<<<<<<< + * else: + * # need to call releasebuffer + */ + __Pyx_INCREF(Py_None); + __Pyx_GIVEREF(Py_None); + __Pyx_GOTREF(__pyx_v_info->obj); + __Pyx_DECREF(__pyx_v_info->obj); + __pyx_v_info->obj = Py_None; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":265 + * cdef bint hasfields = PyDataType_HASFIELDS(descr) + * + * if not hasfields and not copy_shape: # <<<<<<<<<<<<<< + * # do not call releasebuffer + * info.obj = None + */ + goto __pyx_L14; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":270 + * else: + * # need to call releasebuffer + * info.obj = self # <<<<<<<<<<<<<< + * + * if not hasfields: + */ + /*else*/ { + __Pyx_INCREF(((PyObject *)__pyx_v_self)); + __Pyx_GIVEREF(((PyObject *)__pyx_v_self)); + __Pyx_GOTREF(__pyx_v_info->obj); + __Pyx_DECREF(__pyx_v_info->obj); + __pyx_v_info->obj = ((PyObject *)__pyx_v_self); + } + __pyx_L14:; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":272 + * info.obj = self + * + * if not hasfields: # <<<<<<<<<<<<<< + * t = descr.type_num + * if ((descr.byteorder == c'>' and little_endian) or + */ + __pyx_t_1 = ((!(__pyx_v_hasfields != 0)) != 0); + if (__pyx_t_1) { + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":273 + * + * if not hasfields: + * t = descr.type_num # <<<<<<<<<<<<<< + * if ((descr.byteorder == c'>' and little_endian) or + * (descr.byteorder == c'<' and not little_endian)): + */ + __pyx_t_4 = __pyx_v_descr->type_num; + __pyx_v_t = __pyx_t_4; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":274 + * if not hasfields: + * t = descr.type_num + * if ((descr.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< + * (descr.byteorder == c'<' and not little_endian)): + * raise ValueError(u"Non-native byte order not supported") + */ + __pyx_t_2 = ((__pyx_v_descr->byteorder == '>') != 0); + if (!__pyx_t_2) { + goto __pyx_L20_next_or; + } else { + } + __pyx_t_2 = (__pyx_v_little_endian != 0); + if (!__pyx_t_2) { + } else { + __pyx_t_1 = __pyx_t_2; + goto __pyx_L19_bool_binop_done; + } + __pyx_L20_next_or:; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":275 + * t = descr.type_num + * if ((descr.byteorder == c'>' and little_endian) or + * (descr.byteorder == c'<' and not little_endian)): # <<<<<<<<<<<<<< + * raise ValueError(u"Non-native byte order not supported") + * if t == NPY_BYTE: f = "b" + */ + __pyx_t_2 = ((__pyx_v_descr->byteorder == '<') != 0); + if (__pyx_t_2) { + } else { + __pyx_t_1 = __pyx_t_2; + goto __pyx_L19_bool_binop_done; + } + __pyx_t_2 = ((!(__pyx_v_little_endian != 0)) != 0); + __pyx_t_1 = __pyx_t_2; + __pyx_L19_bool_binop_done:; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":274 + * if not hasfields: + * t = descr.type_num + * if ((descr.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< + * (descr.byteorder == c'<' and not little_endian)): + * raise ValueError(u"Non-native byte order not supported") + */ + if (__pyx_t_1) { + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":276 + * if ((descr.byteorder == c'>' and little_endian) or + * (descr.byteorder == c'<' and not little_endian)): + * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< + * if t == NPY_BYTE: f = "b" + * elif t == NPY_UBYTE: f = "B" + */ + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__5, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 276, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_Raise(__pyx_t_3, 0, 0, 0); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __PYX_ERR(1, 276, __pyx_L1_error) + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":274 + * if not hasfields: + * t = descr.type_num + * if ((descr.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< + * (descr.byteorder == c'<' and not little_endian)): + * raise ValueError(u"Non-native byte order not supported") + */ + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":277 + * (descr.byteorder == c'<' and not little_endian)): + * raise ValueError(u"Non-native byte order not supported") + * if t == NPY_BYTE: f = "b" # <<<<<<<<<<<<<< + * elif t == NPY_UBYTE: f = "B" + * elif t == NPY_SHORT: f = "h" + */ + switch (__pyx_v_t) { + case NPY_BYTE: + __pyx_v_f = ((char *)"b"); + break; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":278 + * raise ValueError(u"Non-native byte order not supported") + * if t == NPY_BYTE: f = "b" + * elif t == NPY_UBYTE: f = "B" # <<<<<<<<<<<<<< + * elif t == NPY_SHORT: f = "h" + * elif t == NPY_USHORT: f = "H" + */ + case NPY_UBYTE: + __pyx_v_f = ((char *)"B"); + break; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":279 + * if t == NPY_BYTE: f = "b" + * elif t == NPY_UBYTE: f = "B" + * elif t == NPY_SHORT: f = "h" # <<<<<<<<<<<<<< + * elif t == NPY_USHORT: f = "H" + * elif t == NPY_INT: f = "i" + */ + case NPY_SHORT: + __pyx_v_f = ((char *)"h"); + break; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":280 + * elif t == NPY_UBYTE: f = "B" + * elif t == NPY_SHORT: f = "h" + * elif t == NPY_USHORT: f = "H" # <<<<<<<<<<<<<< + * elif t == NPY_INT: f = "i" + * elif t == NPY_UINT: f = "I" + */ + case NPY_USHORT: + __pyx_v_f = ((char *)"H"); + break; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":281 + * elif t == NPY_SHORT: f = "h" + * elif t == NPY_USHORT: f = "H" + * elif t == NPY_INT: f = "i" # <<<<<<<<<<<<<< + * elif t == NPY_UINT: f = "I" + * elif t == NPY_LONG: f = "l" + */ + case NPY_INT: + __pyx_v_f = ((char *)"i"); + break; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":282 + * elif t == NPY_USHORT: f = "H" + * elif t == NPY_INT: f = "i" + * elif t == NPY_UINT: f = "I" # <<<<<<<<<<<<<< + * elif t == NPY_LONG: f = "l" + * elif t == NPY_ULONG: f = "L" + */ + case NPY_UINT: + __pyx_v_f = ((char *)"I"); + break; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":283 + * elif t == NPY_INT: f = "i" + * elif t == NPY_UINT: f = "I" + * elif t == NPY_LONG: f = "l" # <<<<<<<<<<<<<< + * elif t == NPY_ULONG: f = "L" + * elif t == NPY_LONGLONG: f = "q" + */ + case NPY_LONG: + __pyx_v_f = ((char *)"l"); + break; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":284 + * elif t == NPY_UINT: f = "I" + * elif t == NPY_LONG: f = "l" + * elif t == NPY_ULONG: f = "L" # <<<<<<<<<<<<<< + * elif t == NPY_LONGLONG: f = "q" + * elif t == NPY_ULONGLONG: f = "Q" + */ + case NPY_ULONG: + __pyx_v_f = ((char *)"L"); + break; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":285 + * elif t == NPY_LONG: f = "l" + * elif t == NPY_ULONG: f = "L" + * elif t == NPY_LONGLONG: f = "q" # <<<<<<<<<<<<<< + * elif t == NPY_ULONGLONG: f = "Q" + * elif t == NPY_FLOAT: f = "f" + */ + case NPY_LONGLONG: + __pyx_v_f = ((char *)"q"); + break; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":286 + * elif t == NPY_ULONG: f = "L" + * elif t == NPY_LONGLONG: f = "q" + * elif t == NPY_ULONGLONG: f = "Q" # <<<<<<<<<<<<<< + * elif t == NPY_FLOAT: f = "f" + * elif t == NPY_DOUBLE: f = "d" + */ + case NPY_ULONGLONG: + __pyx_v_f = ((char *)"Q"); + break; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":287 + * elif t == NPY_LONGLONG: f = "q" + * elif t == NPY_ULONGLONG: f = "Q" + * elif t == NPY_FLOAT: f = "f" # <<<<<<<<<<<<<< + * elif t == NPY_DOUBLE: f = "d" + * elif t == NPY_LONGDOUBLE: f = "g" + */ + case NPY_FLOAT: + __pyx_v_f = ((char *)"f"); + break; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":288 + * elif t == NPY_ULONGLONG: f = "Q" + * elif t == NPY_FLOAT: f = "f" + * elif t == NPY_DOUBLE: f = "d" # <<<<<<<<<<<<<< + * elif t == NPY_LONGDOUBLE: f = "g" + * elif t == NPY_CFLOAT: f = "Zf" + */ + case NPY_DOUBLE: + __pyx_v_f = ((char *)"d"); + break; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":289 + * elif t == NPY_FLOAT: f = "f" + * elif t == NPY_DOUBLE: f = "d" + * elif t == NPY_LONGDOUBLE: f = "g" # <<<<<<<<<<<<<< + * elif t == NPY_CFLOAT: f = "Zf" + * elif t == NPY_CDOUBLE: f = "Zd" + */ + case NPY_LONGDOUBLE: + __pyx_v_f = ((char *)"g"); + break; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":290 + * elif t == NPY_DOUBLE: f = "d" + * elif t == NPY_LONGDOUBLE: f = "g" + * elif t == NPY_CFLOAT: f = "Zf" # <<<<<<<<<<<<<< + * elif t == NPY_CDOUBLE: f = "Zd" + * elif t == NPY_CLONGDOUBLE: f = "Zg" + */ + case NPY_CFLOAT: + __pyx_v_f = ((char *)"Zf"); + break; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":291 + * elif t == NPY_LONGDOUBLE: f = "g" + * elif t == NPY_CFLOAT: f = "Zf" + * elif t == NPY_CDOUBLE: f = "Zd" # <<<<<<<<<<<<<< + * elif t == NPY_CLONGDOUBLE: f = "Zg" + * elif t == NPY_OBJECT: f = "O" + */ + case NPY_CDOUBLE: + __pyx_v_f = ((char *)"Zd"); + break; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":292 + * elif t == NPY_CFLOAT: f = "Zf" + * elif t == NPY_CDOUBLE: f = "Zd" + * elif t == NPY_CLONGDOUBLE: f = "Zg" # <<<<<<<<<<<<<< + * elif t == NPY_OBJECT: f = "O" + * else: + */ + case NPY_CLONGDOUBLE: + __pyx_v_f = ((char *)"Zg"); + break; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":293 + * elif t == NPY_CDOUBLE: f = "Zd" + * elif t == NPY_CLONGDOUBLE: f = "Zg" + * elif t == NPY_OBJECT: f = "O" # <<<<<<<<<<<<<< + * else: + * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) + */ + case NPY_OBJECT: + __pyx_v_f = ((char *)"O"); + break; + default: + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":295 + * elif t == NPY_OBJECT: f = "O" + * else: + * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) # <<<<<<<<<<<<<< + * info.format = f + * return + */ + __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_t); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 295, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_6 = PyUnicode_Format(__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_t_3); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 295, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 295, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_GIVEREF(__pyx_t_6); + PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_6); + __pyx_t_6 = 0; + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_t_3, NULL); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 295, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_Raise(__pyx_t_6, 0, 0, 0); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __PYX_ERR(1, 295, __pyx_L1_error) + break; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":296 + * else: + * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) + * info.format = f # <<<<<<<<<<<<<< + * return + * else: + */ + __pyx_v_info->format = __pyx_v_f; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":297 + * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) + * info.format = f + * return # <<<<<<<<<<<<<< + * else: + * info.format = PyObject_Malloc(_buffer_format_string_len) + */ + __pyx_r = 0; + goto __pyx_L0; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":272 + * info.obj = self + * + * if not hasfields: # <<<<<<<<<<<<<< + * t = descr.type_num + * if ((descr.byteorder == c'>' and little_endian) or + */ + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":299 + * return + * else: + * info.format = PyObject_Malloc(_buffer_format_string_len) # <<<<<<<<<<<<<< + * info.format[0] = c'^' # Native data types, manual alignment + * offset = 0 + */ + /*else*/ { + __pyx_v_info->format = ((char *)PyObject_Malloc(0xFF)); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":300 + * else: + * info.format = PyObject_Malloc(_buffer_format_string_len) + * info.format[0] = c'^' # Native data types, manual alignment # <<<<<<<<<<<<<< + * offset = 0 + * f = _util_dtypestring(descr, info.format + 1, + */ + (__pyx_v_info->format[0]) = '^'; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":301 + * info.format = PyObject_Malloc(_buffer_format_string_len) + * info.format[0] = c'^' # Native data types, manual alignment + * offset = 0 # <<<<<<<<<<<<<< + * f = _util_dtypestring(descr, info.format + 1, + * info.format + _buffer_format_string_len, + */ + __pyx_v_offset = 0; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":302 + * info.format[0] = c'^' # Native data types, manual alignment + * offset = 0 + * f = _util_dtypestring(descr, info.format + 1, # <<<<<<<<<<<<<< + * info.format + _buffer_format_string_len, + * &offset) + */ + __pyx_t_7 = __pyx_f_5numpy__util_dtypestring(__pyx_v_descr, (__pyx_v_info->format + 1), (__pyx_v_info->format + 0xFF), (&__pyx_v_offset)); if (unlikely(__pyx_t_7 == ((char *)NULL))) __PYX_ERR(1, 302, __pyx_L1_error) + __pyx_v_f = __pyx_t_7; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":305 + * info.format + _buffer_format_string_len, + * &offset) + * f[0] = c'\0' # Terminate format string # <<<<<<<<<<<<<< + * + * def __releasebuffer__(ndarray self, Py_buffer* info): + */ + (__pyx_v_f[0]) = '\x00'; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":214 + * # experimental exception made for __getbuffer__ and __releasebuffer__ + * # -- the details of this may change. + * def __getbuffer__(ndarray self, Py_buffer* info, int flags): # <<<<<<<<<<<<<< + * # This implementation of getbuffer is geared towards Cython + * # requirements, and does not yet fullfill the PEP. + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_AddTraceback("numpy.ndarray.__getbuffer__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + if (__pyx_v_info != NULL && __pyx_v_info->obj != NULL) { + __Pyx_GOTREF(__pyx_v_info->obj); + __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = NULL; + } + goto __pyx_L2; + __pyx_L0:; + if (__pyx_v_info != NULL && __pyx_v_info->obj == Py_None) { + __Pyx_GOTREF(Py_None); + __Pyx_DECREF(Py_None); __pyx_v_info->obj = NULL; + } + __pyx_L2:; + __Pyx_XDECREF((PyObject *)__pyx_v_descr); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":307 + * f[0] = c'\0' # Terminate format string + * + * def __releasebuffer__(ndarray self, Py_buffer* info): # <<<<<<<<<<<<<< + * if PyArray_HASFIELDS(self): + * PyObject_Free(info.format) + */ + +/* Python wrapper */ +static CYTHON_UNUSED void __pyx_pw_5numpy_7ndarray_3__releasebuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info); /*proto*/ +static CYTHON_UNUSED void __pyx_pw_5numpy_7ndarray_3__releasebuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__releasebuffer__ (wrapper)", 0); + __pyx_pf_5numpy_7ndarray_2__releasebuffer__(((PyArrayObject *)__pyx_v_self), ((Py_buffer *)__pyx_v_info)); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); +} + +static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info) { + __Pyx_RefNannyDeclarations + int __pyx_t_1; + __Pyx_RefNannySetupContext("__releasebuffer__", 0); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":308 + * + * def __releasebuffer__(ndarray self, Py_buffer* info): + * if PyArray_HASFIELDS(self): # <<<<<<<<<<<<<< + * PyObject_Free(info.format) + * if sizeof(npy_intp) != sizeof(Py_ssize_t): + */ + __pyx_t_1 = (PyArray_HASFIELDS(__pyx_v_self) != 0); + if (__pyx_t_1) { + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":309 + * def __releasebuffer__(ndarray self, Py_buffer* info): + * if PyArray_HASFIELDS(self): + * PyObject_Free(info.format) # <<<<<<<<<<<<<< + * if sizeof(npy_intp) != sizeof(Py_ssize_t): + * PyObject_Free(info.strides) + */ + PyObject_Free(__pyx_v_info->format); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":308 + * + * def __releasebuffer__(ndarray self, Py_buffer* info): + * if PyArray_HASFIELDS(self): # <<<<<<<<<<<<<< + * PyObject_Free(info.format) + * if sizeof(npy_intp) != sizeof(Py_ssize_t): + */ + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":310 + * if PyArray_HASFIELDS(self): + * PyObject_Free(info.format) + * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<< + * PyObject_Free(info.strides) + * # info.shape was stored after info.strides in the same block + */ + __pyx_t_1 = (((sizeof(npy_intp)) != (sizeof(Py_ssize_t))) != 0); + if (__pyx_t_1) { + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":311 + * PyObject_Free(info.format) + * if sizeof(npy_intp) != sizeof(Py_ssize_t): + * PyObject_Free(info.strides) # <<<<<<<<<<<<<< + * # info.shape was stored after info.strides in the same block + * + */ + PyObject_Free(__pyx_v_info->strides); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":310 + * if PyArray_HASFIELDS(self): + * PyObject_Free(info.format) + * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<< + * PyObject_Free(info.strides) + * # info.shape was stored after info.strides in the same block + */ + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":307 + * f[0] = c'\0' # Terminate format string + * + * def __releasebuffer__(ndarray self, Py_buffer* info): # <<<<<<<<<<<<<< + * if PyArray_HASFIELDS(self): + * PyObject_Free(info.format) + */ + + /* function exit code */ + __Pyx_RefNannyFinishContext(); +} + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":788 + * ctypedef npy_cdouble complex_t + * + * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(1, a) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__pyx_v_a) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 0); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":789 + * + * cdef inline object PyArray_MultiIterNew1(a): + * return PyArray_MultiIterNew(1, a) # <<<<<<<<<<<<<< + * + * cdef inline object PyArray_MultiIterNew2(a, b): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyArray_MultiIterNew(1, ((void *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 789, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":788 + * ctypedef npy_cdouble complex_t + * + * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(1, a) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("numpy.PyArray_MultiIterNew1", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":791 + * return PyArray_MultiIterNew(1, a) + * + * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(2, a, b) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__pyx_v_a, PyObject *__pyx_v_b) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 0); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":792 + * + * cdef inline object PyArray_MultiIterNew2(a, b): + * return PyArray_MultiIterNew(2, a, b) # <<<<<<<<<<<<<< + * + * cdef inline object PyArray_MultiIterNew3(a, b, c): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyArray_MultiIterNew(2, ((void *)__pyx_v_a), ((void *)__pyx_v_b)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 792, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":791 + * return PyArray_MultiIterNew(1, a) + * + * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(2, a, b) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("numpy.PyArray_MultiIterNew2", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":794 + * return PyArray_MultiIterNew(2, a, b) + * + * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(3, a, b, c) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 0); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":795 + * + * cdef inline object PyArray_MultiIterNew3(a, b, c): + * return PyArray_MultiIterNew(3, a, b, c) # <<<<<<<<<<<<<< + * + * cdef inline object PyArray_MultiIterNew4(a, b, c, d): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyArray_MultiIterNew(3, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 795, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":794 + * return PyArray_MultiIterNew(2, a, b) + * + * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(3, a, b, c) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("numpy.PyArray_MultiIterNew3", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":797 + * return PyArray_MultiIterNew(3, a, b, c) + * + * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(4, a, b, c, d) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 0); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":798 + * + * cdef inline object PyArray_MultiIterNew4(a, b, c, d): + * return PyArray_MultiIterNew(4, a, b, c, d) # <<<<<<<<<<<<<< + * + * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyArray_MultiIterNew(4, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 798, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":797 + * return PyArray_MultiIterNew(3, a, b, c) + * + * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(4, a, b, c, d) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("numpy.PyArray_MultiIterNew4", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":800 + * return PyArray_MultiIterNew(4, a, b, c, d) + * + * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(5, a, b, c, d, e) + * + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d, PyObject *__pyx_v_e) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 0); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":801 + * + * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): + * return PyArray_MultiIterNew(5, a, b, c, d, e) # <<<<<<<<<<<<<< + * + * cdef inline tuple PyDataType_SHAPE(dtype d): + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyArray_MultiIterNew(5, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d), ((void *)__pyx_v_e)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 801, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; + goto __pyx_L0; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":800 + * return PyArray_MultiIterNew(4, a, b, c, d) + * + * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< + * return PyArray_MultiIterNew(5, a, b, c, d, e) + * + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("numpy.PyArray_MultiIterNew5", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = 0; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":803 + * return PyArray_MultiIterNew(5, a, b, c, d, e) + * + * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<< + * if PyDataType_HASSUBARRAY(d): + * return d.subarray.shape + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__pyx_v_d) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + __Pyx_RefNannySetupContext("PyDataType_SHAPE", 0); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":804 + * + * cdef inline tuple PyDataType_SHAPE(dtype d): + * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<< + * return d.subarray.shape + * else: + */ + __pyx_t_1 = (PyDataType_HASSUBARRAY(__pyx_v_d) != 0); + if (__pyx_t_1) { + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":805 + * cdef inline tuple PyDataType_SHAPE(dtype d): + * if PyDataType_HASSUBARRAY(d): + * return d.subarray.shape # <<<<<<<<<<<<<< + * else: + * return () + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(((PyObject*)__pyx_v_d->subarray->shape)); + __pyx_r = ((PyObject*)__pyx_v_d->subarray->shape); + goto __pyx_L0; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":804 + * + * cdef inline tuple PyDataType_SHAPE(dtype d): + * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<< + * return d.subarray.shape + * else: + */ + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":807 + * return d.subarray.shape + * else: + * return () # <<<<<<<<<<<<<< + * + * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL: + */ + /*else*/ { + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_empty_tuple); + __pyx_r = __pyx_empty_tuple; + goto __pyx_L0; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":803 + * return PyArray_MultiIterNew(5, a, b, c, d, e) + * + * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<< + * if PyDataType_HASSUBARRAY(d): + * return d.subarray.shape + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":809 + * return () + * + * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL: # <<<<<<<<<<<<<< + * # Recursive utility function used in __getbuffer__ to get format + * # string. The new location in the format string is returned. + */ + +static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx_v_descr, char *__pyx_v_f, char *__pyx_v_end, int *__pyx_v_offset) { + PyArray_Descr *__pyx_v_child = 0; + int __pyx_v_endian_detector; + int __pyx_v_little_endian; + PyObject *__pyx_v_fields = 0; + PyObject *__pyx_v_childname = NULL; + PyObject *__pyx_v_new_offset = NULL; + PyObject *__pyx_v_t = NULL; + char *__pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + Py_ssize_t __pyx_t_2; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + int __pyx_t_5; + int __pyx_t_6; + int __pyx_t_7; + long __pyx_t_8; + char *__pyx_t_9; + __Pyx_RefNannySetupContext("_util_dtypestring", 0); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":814 + * + * cdef dtype child + * cdef int endian_detector = 1 # <<<<<<<<<<<<<< + * cdef bint little_endian = ((&endian_detector)[0] != 0) + * cdef tuple fields + */ + __pyx_v_endian_detector = 1; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":815 + * cdef dtype child + * cdef int endian_detector = 1 + * cdef bint little_endian = ((&endian_detector)[0] != 0) # <<<<<<<<<<<<<< + * cdef tuple fields + * + */ + __pyx_v_little_endian = ((((char *)(&__pyx_v_endian_detector))[0]) != 0); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":818 + * cdef tuple fields + * + * for childname in descr.names: # <<<<<<<<<<<<<< + * fields = descr.fields[childname] + * child, new_offset = fields + */ + if (unlikely(__pyx_v_descr->names == Py_None)) { + PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); + __PYX_ERR(1, 818, __pyx_L1_error) + } + __pyx_t_1 = __pyx_v_descr->names; __Pyx_INCREF(__pyx_t_1); __pyx_t_2 = 0; + for (;;) { + if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_1)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_3); __pyx_t_2++; if (unlikely(0 < 0)) __PYX_ERR(1, 818, __pyx_L1_error) + #else + __pyx_t_3 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 818, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + #endif + __Pyx_XDECREF_SET(__pyx_v_childname, __pyx_t_3); + __pyx_t_3 = 0; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":819 + * + * for childname in descr.names: + * fields = descr.fields[childname] # <<<<<<<<<<<<<< + * child, new_offset = fields + * + */ + if (unlikely(__pyx_v_descr->fields == Py_None)) { + PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); + __PYX_ERR(1, 819, __pyx_L1_error) + } + __pyx_t_3 = __Pyx_PyDict_GetItem(__pyx_v_descr->fields, __pyx_v_childname); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 819, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(PyTuple_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "tuple", Py_TYPE(__pyx_t_3)->tp_name), 0))) __PYX_ERR(1, 819, __pyx_L1_error) + __Pyx_XDECREF_SET(__pyx_v_fields, ((PyObject*)__pyx_t_3)); + __pyx_t_3 = 0; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":820 + * for childname in descr.names: + * fields = descr.fields[childname] + * child, new_offset = fields # <<<<<<<<<<<<<< + * + * if (end - f) - (new_offset - offset[0]) < 15: + */ + if (likely(__pyx_v_fields != Py_None)) { + PyObject* sequence = __pyx_v_fields; + #if !CYTHON_COMPILING_IN_PYPY + Py_ssize_t size = Py_SIZE(sequence); + #else + Py_ssize_t size = PySequence_Size(sequence); + #endif + if (unlikely(size != 2)) { + if (size > 2) __Pyx_RaiseTooManyValuesError(2); + else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); + __PYX_ERR(1, 820, __pyx_L1_error) + } + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_3 = PyTuple_GET_ITEM(sequence, 0); + __pyx_t_4 = PyTuple_GET_ITEM(sequence, 1); + __Pyx_INCREF(__pyx_t_3); + __Pyx_INCREF(__pyx_t_4); + #else + __pyx_t_3 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 820, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 820, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + #endif + } else { + __Pyx_RaiseNoneNotIterableError(); __PYX_ERR(1, 820, __pyx_L1_error) + } + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_dtype))))) __PYX_ERR(1, 820, __pyx_L1_error) + __Pyx_XDECREF_SET(__pyx_v_child, ((PyArray_Descr *)__pyx_t_3)); + __pyx_t_3 = 0; + __Pyx_XDECREF_SET(__pyx_v_new_offset, __pyx_t_4); + __pyx_t_4 = 0; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":822 + * child, new_offset = fields + * + * if (end - f) - (new_offset - offset[0]) < 15: # <<<<<<<<<<<<<< + * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") + * + */ + __pyx_t_4 = __Pyx_PyInt_From_int((__pyx_v_offset[0])); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 822, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_3 = PyNumber_Subtract(__pyx_v_new_offset, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 822, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_5 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 822, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_6 = ((((__pyx_v_end - __pyx_v_f) - ((int)__pyx_t_5)) < 15) != 0); + if (__pyx_t_6) { + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":823 + * + * if (end - f) - (new_offset - offset[0]) < 15: + * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") # <<<<<<<<<<<<<< + * + * if ((child.byteorder == c'>' and little_endian) or + */ + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 823, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_Raise(__pyx_t_3, 0, 0, 0); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __PYX_ERR(1, 823, __pyx_L1_error) + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":822 + * child, new_offset = fields + * + * if (end - f) - (new_offset - offset[0]) < 15: # <<<<<<<<<<<<<< + * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") + * + */ + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":825 + * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") + * + * if ((child.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< + * (child.byteorder == c'<' and not little_endian)): + * raise ValueError(u"Non-native byte order not supported") + */ + __pyx_t_7 = ((__pyx_v_child->byteorder == '>') != 0); + if (!__pyx_t_7) { + goto __pyx_L8_next_or; + } else { + } + __pyx_t_7 = (__pyx_v_little_endian != 0); + if (!__pyx_t_7) { + } else { + __pyx_t_6 = __pyx_t_7; + goto __pyx_L7_bool_binop_done; + } + __pyx_L8_next_or:; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":826 + * + * if ((child.byteorder == c'>' and little_endian) or + * (child.byteorder == c'<' and not little_endian)): # <<<<<<<<<<<<<< + * raise ValueError(u"Non-native byte order not supported") + * # One could encode it in the format string and have Cython + */ + __pyx_t_7 = ((__pyx_v_child->byteorder == '<') != 0); + if (__pyx_t_7) { + } else { + __pyx_t_6 = __pyx_t_7; + goto __pyx_L7_bool_binop_done; + } + __pyx_t_7 = ((!(__pyx_v_little_endian != 0)) != 0); + __pyx_t_6 = __pyx_t_7; + __pyx_L7_bool_binop_done:; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":825 + * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") + * + * if ((child.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< + * (child.byteorder == c'<' and not little_endian)): + * raise ValueError(u"Non-native byte order not supported") + */ + if (__pyx_t_6) { + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":827 + * if ((child.byteorder == c'>' and little_endian) or + * (child.byteorder == c'<' and not little_endian)): + * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< + * # One could encode it in the format string and have Cython + * # complain instead, BUT: < and > in format strings also imply + */ + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 827, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_Raise(__pyx_t_3, 0, 0, 0); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __PYX_ERR(1, 827, __pyx_L1_error) + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":825 + * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") + * + * if ((child.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< + * (child.byteorder == c'<' and not little_endian)): + * raise ValueError(u"Non-native byte order not supported") + */ + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":837 + * + * # Output padding bytes + * while offset[0] < new_offset: # <<<<<<<<<<<<<< + * f[0] = 120 # "x"; pad byte + * f += 1 + */ + while (1) { + __pyx_t_3 = __Pyx_PyInt_From_int((__pyx_v_offset[0])); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 837, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = PyObject_RichCompare(__pyx_t_3, __pyx_v_new_offset, Py_LT); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 837, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 837, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (!__pyx_t_6) break; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":838 + * # Output padding bytes + * while offset[0] < new_offset: + * f[0] = 120 # "x"; pad byte # <<<<<<<<<<<<<< + * f += 1 + * offset[0] += 1 + */ + (__pyx_v_f[0]) = 0x78; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":839 + * while offset[0] < new_offset: + * f[0] = 120 # "x"; pad byte + * f += 1 # <<<<<<<<<<<<<< + * offset[0] += 1 + * + */ + __pyx_v_f = (__pyx_v_f + 1); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":840 + * f[0] = 120 # "x"; pad byte + * f += 1 + * offset[0] += 1 # <<<<<<<<<<<<<< + * + * offset[0] += child.itemsize + */ + __pyx_t_8 = 0; + (__pyx_v_offset[__pyx_t_8]) = ((__pyx_v_offset[__pyx_t_8]) + 1); + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":842 + * offset[0] += 1 + * + * offset[0] += child.itemsize # <<<<<<<<<<<<<< + * + * if not PyDataType_HASFIELDS(child): + */ + __pyx_t_8 = 0; + (__pyx_v_offset[__pyx_t_8]) = ((__pyx_v_offset[__pyx_t_8]) + __pyx_v_child->elsize); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":844 + * offset[0] += child.itemsize + * + * if not PyDataType_HASFIELDS(child): # <<<<<<<<<<<<<< + * t = child.type_num + * if end - f < 5: + */ + __pyx_t_6 = ((!(PyDataType_HASFIELDS(__pyx_v_child) != 0)) != 0); + if (__pyx_t_6) { + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":845 + * + * if not PyDataType_HASFIELDS(child): + * t = child.type_num # <<<<<<<<<<<<<< + * if end - f < 5: + * raise RuntimeError(u"Format string allocated too short.") + */ + __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_child->type_num); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 845, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_XDECREF_SET(__pyx_v_t, __pyx_t_4); + __pyx_t_4 = 0; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":846 + * if not PyDataType_HASFIELDS(child): + * t = child.type_num + * if end - f < 5: # <<<<<<<<<<<<<< + * raise RuntimeError(u"Format string allocated too short.") + * + */ + __pyx_t_6 = (((__pyx_v_end - __pyx_v_f) < 5) != 0); + if (__pyx_t_6) { + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":847 + * t = child.type_num + * if end - f < 5: + * raise RuntimeError(u"Format string allocated too short.") # <<<<<<<<<<<<<< + * + * # Until ticket #99 is fixed, use integers to avoid warnings + */ + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__8, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 847, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_Raise(__pyx_t_4, 0, 0, 0); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __PYX_ERR(1, 847, __pyx_L1_error) + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":846 + * if not PyDataType_HASFIELDS(child): + * t = child.type_num + * if end - f < 5: # <<<<<<<<<<<<<< + * raise RuntimeError(u"Format string allocated too short.") + * + */ + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":850 + * + * # Until ticket #99 is fixed, use integers to avoid warnings + * if t == NPY_BYTE: f[0] = 98 #"b" # <<<<<<<<<<<<<< + * elif t == NPY_UBYTE: f[0] = 66 #"B" + * elif t == NPY_SHORT: f[0] = 104 #"h" + */ + __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_BYTE); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 850, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 850, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 850, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (__pyx_t_6) { + (__pyx_v_f[0]) = 98; + goto __pyx_L15; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":851 + * # Until ticket #99 is fixed, use integers to avoid warnings + * if t == NPY_BYTE: f[0] = 98 #"b" + * elif t == NPY_UBYTE: f[0] = 66 #"B" # <<<<<<<<<<<<<< + * elif t == NPY_SHORT: f[0] = 104 #"h" + * elif t == NPY_USHORT: f[0] = 72 #"H" + */ + __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_UBYTE); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 851, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 851, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 851, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (__pyx_t_6) { + (__pyx_v_f[0]) = 66; + goto __pyx_L15; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":852 + * if t == NPY_BYTE: f[0] = 98 #"b" + * elif t == NPY_UBYTE: f[0] = 66 #"B" + * elif t == NPY_SHORT: f[0] = 104 #"h" # <<<<<<<<<<<<<< + * elif t == NPY_USHORT: f[0] = 72 #"H" + * elif t == NPY_INT: f[0] = 105 #"i" + */ + __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_SHORT); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 852, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 852, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 852, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (__pyx_t_6) { + (__pyx_v_f[0]) = 0x68; + goto __pyx_L15; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":853 + * elif t == NPY_UBYTE: f[0] = 66 #"B" + * elif t == NPY_SHORT: f[0] = 104 #"h" + * elif t == NPY_USHORT: f[0] = 72 #"H" # <<<<<<<<<<<<<< + * elif t == NPY_INT: f[0] = 105 #"i" + * elif t == NPY_UINT: f[0] = 73 #"I" + */ + __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_USHORT); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 853, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 853, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 853, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (__pyx_t_6) { + (__pyx_v_f[0]) = 72; + goto __pyx_L15; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":854 + * elif t == NPY_SHORT: f[0] = 104 #"h" + * elif t == NPY_USHORT: f[0] = 72 #"H" + * elif t == NPY_INT: f[0] = 105 #"i" # <<<<<<<<<<<<<< + * elif t == NPY_UINT: f[0] = 73 #"I" + * elif t == NPY_LONG: f[0] = 108 #"l" + */ + __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_INT); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 854, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 854, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 854, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (__pyx_t_6) { + (__pyx_v_f[0]) = 0x69; + goto __pyx_L15; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":855 + * elif t == NPY_USHORT: f[0] = 72 #"H" + * elif t == NPY_INT: f[0] = 105 #"i" + * elif t == NPY_UINT: f[0] = 73 #"I" # <<<<<<<<<<<<<< + * elif t == NPY_LONG: f[0] = 108 #"l" + * elif t == NPY_ULONG: f[0] = 76 #"L" + */ + __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_UINT); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 855, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 855, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 855, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (__pyx_t_6) { + (__pyx_v_f[0]) = 73; + goto __pyx_L15; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":856 + * elif t == NPY_INT: f[0] = 105 #"i" + * elif t == NPY_UINT: f[0] = 73 #"I" + * elif t == NPY_LONG: f[0] = 108 #"l" # <<<<<<<<<<<<<< + * elif t == NPY_ULONG: f[0] = 76 #"L" + * elif t == NPY_LONGLONG: f[0] = 113 #"q" + */ + __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_LONG); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 856, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 856, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 856, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (__pyx_t_6) { + (__pyx_v_f[0]) = 0x6C; + goto __pyx_L15; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":857 + * elif t == NPY_UINT: f[0] = 73 #"I" + * elif t == NPY_LONG: f[0] = 108 #"l" + * elif t == NPY_ULONG: f[0] = 76 #"L" # <<<<<<<<<<<<<< + * elif t == NPY_LONGLONG: f[0] = 113 #"q" + * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" + */ + __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_ULONG); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 857, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 857, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 857, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (__pyx_t_6) { + (__pyx_v_f[0]) = 76; + goto __pyx_L15; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":858 + * elif t == NPY_LONG: f[0] = 108 #"l" + * elif t == NPY_ULONG: f[0] = 76 #"L" + * elif t == NPY_LONGLONG: f[0] = 113 #"q" # <<<<<<<<<<<<<< + * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" + * elif t == NPY_FLOAT: f[0] = 102 #"f" + */ + __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_LONGLONG); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 858, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 858, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 858, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (__pyx_t_6) { + (__pyx_v_f[0]) = 0x71; + goto __pyx_L15; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":859 + * elif t == NPY_ULONG: f[0] = 76 #"L" + * elif t == NPY_LONGLONG: f[0] = 113 #"q" + * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" # <<<<<<<<<<<<<< + * elif t == NPY_FLOAT: f[0] = 102 #"f" + * elif t == NPY_DOUBLE: f[0] = 100 #"d" + */ + __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_ULONGLONG); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 859, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 859, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 859, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (__pyx_t_6) { + (__pyx_v_f[0]) = 81; + goto __pyx_L15; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":860 + * elif t == NPY_LONGLONG: f[0] = 113 #"q" + * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" + * elif t == NPY_FLOAT: f[0] = 102 #"f" # <<<<<<<<<<<<<< + * elif t == NPY_DOUBLE: f[0] = 100 #"d" + * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" + */ + __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_FLOAT); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 860, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 860, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 860, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (__pyx_t_6) { + (__pyx_v_f[0]) = 0x66; + goto __pyx_L15; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":861 + * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" + * elif t == NPY_FLOAT: f[0] = 102 #"f" + * elif t == NPY_DOUBLE: f[0] = 100 #"d" # <<<<<<<<<<<<<< + * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" + * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf + */ + __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_DOUBLE); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 861, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 861, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 861, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (__pyx_t_6) { + (__pyx_v_f[0]) = 0x64; + goto __pyx_L15; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":862 + * elif t == NPY_FLOAT: f[0] = 102 #"f" + * elif t == NPY_DOUBLE: f[0] = 100 #"d" + * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" # <<<<<<<<<<<<<< + * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf + * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd + */ + __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_LONGDOUBLE); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 862, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 862, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 862, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (__pyx_t_6) { + (__pyx_v_f[0]) = 0x67; + goto __pyx_L15; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":863 + * elif t == NPY_DOUBLE: f[0] = 100 #"d" + * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" + * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf # <<<<<<<<<<<<<< + * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd + * elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg + */ + __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_CFLOAT); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 863, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 863, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 863, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (__pyx_t_6) { + (__pyx_v_f[0]) = 90; + (__pyx_v_f[1]) = 0x66; + __pyx_v_f = (__pyx_v_f + 1); + goto __pyx_L15; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":864 + * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" + * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf + * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd # <<<<<<<<<<<<<< + * elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg + * elif t == NPY_OBJECT: f[0] = 79 #"O" + */ + __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_CDOUBLE); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 864, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 864, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 864, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (__pyx_t_6) { + (__pyx_v_f[0]) = 90; + (__pyx_v_f[1]) = 0x64; + __pyx_v_f = (__pyx_v_f + 1); + goto __pyx_L15; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":865 + * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf + * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd + * elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg # <<<<<<<<<<<<<< + * elif t == NPY_OBJECT: f[0] = 79 #"O" + * else: + */ + __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_CLONGDOUBLE); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 865, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 865, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 865, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (__pyx_t_6) { + (__pyx_v_f[0]) = 90; + (__pyx_v_f[1]) = 0x67; + __pyx_v_f = (__pyx_v_f + 1); + goto __pyx_L15; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":866 + * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd + * elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg + * elif t == NPY_OBJECT: f[0] = 79 #"O" # <<<<<<<<<<<<<< + * else: + * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) + */ + __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_OBJECT); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 866, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 866, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 866, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (__pyx_t_6) { + (__pyx_v_f[0]) = 79; + goto __pyx_L15; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":868 + * elif t == NPY_OBJECT: f[0] = 79 #"O" + * else: + * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) # <<<<<<<<<<<<<< + * f += 1 + * else: + */ + /*else*/ { + __pyx_t_3 = PyUnicode_Format(__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_v_t); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 868, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 868, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_GIVEREF(__pyx_t_3); + PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_3); + __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 868, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __Pyx_Raise(__pyx_t_3, 0, 0, 0); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __PYX_ERR(1, 868, __pyx_L1_error) + } + __pyx_L15:; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":869 + * else: + * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) + * f += 1 # <<<<<<<<<<<<<< + * else: + * # Cython ignores struct boundary information ("T{...}"), + */ + __pyx_v_f = (__pyx_v_f + 1); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":844 + * offset[0] += child.itemsize + * + * if not PyDataType_HASFIELDS(child): # <<<<<<<<<<<<<< + * t = child.type_num + * if end - f < 5: + */ + goto __pyx_L13; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":873 + * # Cython ignores struct boundary information ("T{...}"), + * # so don't output it + * f = _util_dtypestring(child, f, end, offset) # <<<<<<<<<<<<<< + * return f + * + */ + /*else*/ { + __pyx_t_9 = __pyx_f_5numpy__util_dtypestring(__pyx_v_child, __pyx_v_f, __pyx_v_end, __pyx_v_offset); if (unlikely(__pyx_t_9 == ((char *)NULL))) __PYX_ERR(1, 873, __pyx_L1_error) + __pyx_v_f = __pyx_t_9; + } + __pyx_L13:; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":818 + * cdef tuple fields + * + * for childname in descr.names: # <<<<<<<<<<<<<< + * fields = descr.fields[childname] + * child, new_offset = fields + */ + } + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":874 + * # so don't output it + * f = _util_dtypestring(child, f, end, offset) + * return f # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = __pyx_v_f; + goto __pyx_L0; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":809 + * return () + * + * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL: # <<<<<<<<<<<<<< + * # Recursive utility function used in __getbuffer__ to get format + * # string. The new location in the format string is returned. + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_AddTraceback("numpy._util_dtypestring", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XDECREF((PyObject *)__pyx_v_child); + __Pyx_XDECREF(__pyx_v_fields); + __Pyx_XDECREF(__pyx_v_childname); + __Pyx_XDECREF(__pyx_v_new_offset); + __Pyx_XDECREF(__pyx_v_t); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":990 + * + * + * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<< + * cdef PyObject* baseptr + * if base is None: + */ + +static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_arr, PyObject *__pyx_v_base) { + PyObject *__pyx_v_baseptr; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + int __pyx_t_2; + __Pyx_RefNannySetupContext("set_array_base", 0); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":992 + * cdef inline void set_array_base(ndarray arr, object base): + * cdef PyObject* baseptr + * if base is None: # <<<<<<<<<<<<<< + * baseptr = NULL + * else: + */ + __pyx_t_1 = (__pyx_v_base == Py_None); + __pyx_t_2 = (__pyx_t_1 != 0); + if (__pyx_t_2) { + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":993 + * cdef PyObject* baseptr + * if base is None: + * baseptr = NULL # <<<<<<<<<<<<<< + * else: + * Py_INCREF(base) # important to do this before decref below! + */ + __pyx_v_baseptr = NULL; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":992 + * cdef inline void set_array_base(ndarray arr, object base): + * cdef PyObject* baseptr + * if base is None: # <<<<<<<<<<<<<< + * baseptr = NULL + * else: + */ + goto __pyx_L3; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":995 + * baseptr = NULL + * else: + * Py_INCREF(base) # important to do this before decref below! # <<<<<<<<<<<<<< + * baseptr = base + * Py_XDECREF(arr.base) + */ + /*else*/ { + Py_INCREF(__pyx_v_base); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":996 + * else: + * Py_INCREF(base) # important to do this before decref below! + * baseptr = base # <<<<<<<<<<<<<< + * Py_XDECREF(arr.base) + * arr.base = baseptr + */ + __pyx_v_baseptr = ((PyObject *)__pyx_v_base); + } + __pyx_L3:; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":997 + * Py_INCREF(base) # important to do this before decref below! + * baseptr = base + * Py_XDECREF(arr.base) # <<<<<<<<<<<<<< + * arr.base = baseptr + * + */ + Py_XDECREF(__pyx_v_arr->base); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":998 + * baseptr = base + * Py_XDECREF(arr.base) + * arr.base = baseptr # <<<<<<<<<<<<<< + * + * cdef inline object get_array_base(ndarray arr): + */ + __pyx_v_arr->base = __pyx_v_baseptr; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":990 + * + * + * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<< + * cdef PyObject* baseptr + * if base is None: + */ + + /* function exit code */ + __Pyx_RefNannyFinishContext(); +} + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1000 + * arr.base = baseptr + * + * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< + * if arr.base is NULL: + * return None + */ + +static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__pyx_v_arr) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + int __pyx_t_1; + __Pyx_RefNannySetupContext("get_array_base", 0); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1001 + * + * cdef inline object get_array_base(ndarray arr): + * if arr.base is NULL: # <<<<<<<<<<<<<< + * return None + * else: + */ + __pyx_t_1 = ((__pyx_v_arr->base == NULL) != 0); + if (__pyx_t_1) { + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1002 + * cdef inline object get_array_base(ndarray arr): + * if arr.base is NULL: + * return None # <<<<<<<<<<<<<< + * else: + * return arr.base + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(Py_None); + __pyx_r = Py_None; + goto __pyx_L0; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1001 + * + * cdef inline object get_array_base(ndarray arr): + * if arr.base is NULL: # <<<<<<<<<<<<<< + * return None + * else: + */ + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1004 + * return None + * else: + * return arr.base # <<<<<<<<<<<<<< + * + * + */ + /*else*/ { + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(((PyObject *)__pyx_v_arr->base)); + __pyx_r = ((PyObject *)__pyx_v_arr->base); + goto __pyx_L0; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1000 + * arr.base = baseptr + * + * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< + * if arr.base is NULL: + * return None + */ + + /* function exit code */ + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1009 + * # Versions of the import_* functions which are more suitable for + * # Cython code. + * cdef inline int import_array() except -1: # <<<<<<<<<<<<<< + * try: + * _import_array() + */ + +static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + int __pyx_t_4; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + PyObject *__pyx_t_7 = NULL; + PyObject *__pyx_t_8 = NULL; + __Pyx_RefNannySetupContext("import_array", 0); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1010 + * # Cython code. + * cdef inline int import_array() except -1: + * try: # <<<<<<<<<<<<<< + * _import_array() + * except Exception: + */ + { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); + __Pyx_XGOTREF(__pyx_t_1); + __Pyx_XGOTREF(__pyx_t_2); + __Pyx_XGOTREF(__pyx_t_3); + /*try:*/ { + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1011 + * cdef inline int import_array() except -1: + * try: + * _import_array() # <<<<<<<<<<<<<< + * except Exception: + * raise ImportError("numpy.core.multiarray failed to import") + */ + __pyx_t_4 = _import_array(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 1011, __pyx_L3_error) + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1010 + * # Cython code. + * cdef inline int import_array() except -1: + * try: # <<<<<<<<<<<<<< + * _import_array() + * except Exception: + */ + } + __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + goto __pyx_L8_try_end; + __pyx_L3_error:; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1012 + * try: + * _import_array() + * except Exception: # <<<<<<<<<<<<<< + * raise ImportError("numpy.core.multiarray failed to import") + * + */ + __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0]))); + if (__pyx_t_4) { + __Pyx_AddTraceback("numpy.import_array", __pyx_clineno, __pyx_lineno, __pyx_filename); + if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 1012, __pyx_L5_except_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_GOTREF(__pyx_t_6); + __Pyx_GOTREF(__pyx_t_7); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1013 + * _import_array() + * except Exception: + * raise ImportError("numpy.core.multiarray failed to import") # <<<<<<<<<<<<<< + * + * cdef inline int import_umath() except -1: + */ + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__9, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 1013, __pyx_L5_except_error) + __Pyx_GOTREF(__pyx_t_8); + __Pyx_Raise(__pyx_t_8, 0, 0, 0); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __PYX_ERR(1, 1013, __pyx_L5_except_error) + } + goto __pyx_L5_except_error; + __pyx_L5_except_error:; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1010 + * # Cython code. + * cdef inline int import_array() except -1: + * try: # <<<<<<<<<<<<<< + * _import_array() + * except Exception: + */ + __Pyx_XGIVEREF(__pyx_t_1); + __Pyx_XGIVEREF(__pyx_t_2); + __Pyx_XGIVEREF(__pyx_t_3); + __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); + goto __pyx_L1_error; + __pyx_L8_try_end:; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1009 + * # Versions of the import_* functions which are more suitable for + * # Cython code. + * cdef inline int import_array() except -1: # <<<<<<<<<<<<<< + * try: + * _import_array() + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_AddTraceback("numpy.import_array", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1015 + * raise ImportError("numpy.core.multiarray failed to import") + * + * cdef inline int import_umath() except -1: # <<<<<<<<<<<<<< + * try: + * _import_umath() + */ + +static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + int __pyx_t_4; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + PyObject *__pyx_t_7 = NULL; + PyObject *__pyx_t_8 = NULL; + __Pyx_RefNannySetupContext("import_umath", 0); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1016 + * + * cdef inline int import_umath() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); + __Pyx_XGOTREF(__pyx_t_1); + __Pyx_XGOTREF(__pyx_t_2); + __Pyx_XGOTREF(__pyx_t_3); + /*try:*/ { + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1017 + * cdef inline int import_umath() except -1: + * try: + * _import_umath() # <<<<<<<<<<<<<< + * except Exception: + * raise ImportError("numpy.core.umath failed to import") + */ + __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 1017, __pyx_L3_error) + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1016 + * + * cdef inline int import_umath() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + } + __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + goto __pyx_L8_try_end; + __pyx_L3_error:; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1018 + * try: + * _import_umath() + * except Exception: # <<<<<<<<<<<<<< + * raise ImportError("numpy.core.umath failed to import") + * + */ + __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0]))); + if (__pyx_t_4) { + __Pyx_AddTraceback("numpy.import_umath", __pyx_clineno, __pyx_lineno, __pyx_filename); + if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 1018, __pyx_L5_except_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_GOTREF(__pyx_t_6); + __Pyx_GOTREF(__pyx_t_7); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1019 + * _import_umath() + * except Exception: + * raise ImportError("numpy.core.umath failed to import") # <<<<<<<<<<<<<< + * + * cdef inline int import_ufunc() except -1: + */ + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__10, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 1019, __pyx_L5_except_error) + __Pyx_GOTREF(__pyx_t_8); + __Pyx_Raise(__pyx_t_8, 0, 0, 0); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __PYX_ERR(1, 1019, __pyx_L5_except_error) + } + goto __pyx_L5_except_error; + __pyx_L5_except_error:; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1016 + * + * cdef inline int import_umath() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + __Pyx_XGIVEREF(__pyx_t_1); + __Pyx_XGIVEREF(__pyx_t_2); + __Pyx_XGIVEREF(__pyx_t_3); + __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); + goto __pyx_L1_error; + __pyx_L8_try_end:; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1015 + * raise ImportError("numpy.core.multiarray failed to import") + * + * cdef inline int import_umath() except -1: # <<<<<<<<<<<<<< + * try: + * _import_umath() + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_AddTraceback("numpy.import_umath", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1021 + * raise ImportError("numpy.core.umath failed to import") + * + * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< + * try: + * _import_umath() + */ + +static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { + int __pyx_r; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + int __pyx_t_4; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + PyObject *__pyx_t_7 = NULL; + PyObject *__pyx_t_8 = NULL; + __Pyx_RefNannySetupContext("import_ufunc", 0); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1022 + * + * cdef inline int import_ufunc() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); + __Pyx_XGOTREF(__pyx_t_1); + __Pyx_XGOTREF(__pyx_t_2); + __Pyx_XGOTREF(__pyx_t_3); + /*try:*/ { + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1023 + * cdef inline int import_ufunc() except -1: + * try: + * _import_umath() # <<<<<<<<<<<<<< + * except Exception: + * raise ImportError("numpy.core.umath failed to import") + */ + __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 1023, __pyx_L3_error) + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1022 + * + * cdef inline int import_ufunc() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + } + __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + goto __pyx_L8_try_end; + __pyx_L3_error:; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1024 + * try: + * _import_umath() + * except Exception: # <<<<<<<<<<<<<< + * raise ImportError("numpy.core.umath failed to import") + */ + __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0]))); + if (__pyx_t_4) { + __Pyx_AddTraceback("numpy.import_ufunc", __pyx_clineno, __pyx_lineno, __pyx_filename); + if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 1024, __pyx_L5_except_error) + __Pyx_GOTREF(__pyx_t_5); + __Pyx_GOTREF(__pyx_t_6); + __Pyx_GOTREF(__pyx_t_7); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1025 + * _import_umath() + * except Exception: + * raise ImportError("numpy.core.umath failed to import") # <<<<<<<<<<<<<< + */ + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__11, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 1025, __pyx_L5_except_error) + __Pyx_GOTREF(__pyx_t_8); + __Pyx_Raise(__pyx_t_8, 0, 0, 0); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __PYX_ERR(1, 1025, __pyx_L5_except_error) + } + goto __pyx_L5_except_error; + __pyx_L5_except_error:; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1022 + * + * cdef inline int import_ufunc() except -1: + * try: # <<<<<<<<<<<<<< + * _import_umath() + * except Exception: + */ + __Pyx_XGIVEREF(__pyx_t_1); + __Pyx_XGIVEREF(__pyx_t_2); + __Pyx_XGIVEREF(__pyx_t_3); + __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); + goto __pyx_L1_error; + __pyx_L8_try_end:; + } + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1021 + * raise ImportError("numpy.core.umath failed to import") + * + * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< + * try: + * _import_umath() + */ + + /* function exit code */ + __pyx_r = 0; + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_AddTraceback("numpy.import_ufunc", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = -1; + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyMethodDef __pyx_methods[] = { + {0, 0, 0, 0} +}; + +#if PY_MAJOR_VERSION >= 3 +#if CYTHON_PEP489_MULTI_PHASE_INIT +static PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def); /*proto*/ +static int __pyx_pymod_exec_fasttext_inner(PyObject* module); /*proto*/ +static PyModuleDef_Slot __pyx_moduledef_slots[] = { + {Py_mod_create, (void*)__pyx_pymod_create}, + {Py_mod_exec, (void*)__pyx_pymod_exec_fasttext_inner}, + {0, NULL} +}; +#endif + +static struct PyModuleDef __pyx_moduledef = { + PyModuleDef_HEAD_INIT, + "fasttext_inner", + 0, /* m_doc */ + #if CYTHON_PEP489_MULTI_PHASE_INIT + 0, /* m_size */ + #else + -1, /* m_size */ + #endif + __pyx_methods /* m_methods */, + #if CYTHON_PEP489_MULTI_PHASE_INIT + __pyx_moduledef_slots, /* m_slots */ + #else + NULL, /* m_reload */ + #endif + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL /* m_free */ +}; +#endif + +static __Pyx_StringTabEntry __pyx_string_tab[] = { + {&__pyx_n_s_FAST_VERSION, __pyx_k_FAST_VERSION, sizeof(__pyx_k_FAST_VERSION), 0, 0, 1, 1}, + {&__pyx_kp_u_Format_string_allocated_too_shor, __pyx_k_Format_string_allocated_too_shor, sizeof(__pyx_k_Format_string_allocated_too_shor), 0, 1, 0, 0}, + {&__pyx_kp_u_Format_string_allocated_too_shor_2, __pyx_k_Format_string_allocated_too_shor_2, sizeof(__pyx_k_Format_string_allocated_too_shor_2), 0, 1, 0, 0}, + {&__pyx_n_s_ImportError, __pyx_k_ImportError, sizeof(__pyx_k_ImportError), 0, 0, 1, 1}, + {&__pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_k_MAX_WORDS_IN_BATCH, sizeof(__pyx_k_MAX_WORDS_IN_BATCH), 0, 0, 1, 1}, + {&__pyx_kp_u_Non_native_byte_order_not_suppor, __pyx_k_Non_native_byte_order_not_suppor, sizeof(__pyx_k_Non_native_byte_order_not_suppor), 0, 1, 0, 0}, + {&__pyx_n_s_REAL, __pyx_k_REAL, sizeof(__pyx_k_REAL), 0, 0, 1, 1}, + {&__pyx_n_s_RuntimeError, __pyx_k_RuntimeError, sizeof(__pyx_k_RuntimeError), 0, 0, 1, 1}, + {&__pyx_n_s_ValueError, __pyx_k_ValueError, sizeof(__pyx_k_ValueError), 0, 0, 1, 1}, + {&__pyx_n_s__12, __pyx_k__12, sizeof(__pyx_k__12), 0, 0, 1, 1}, + {&__pyx_n_s_alpha, __pyx_k_alpha, sizeof(__pyx_k_alpha), 0, 0, 1, 1}, + {&__pyx_n_s_alpha_2, __pyx_k_alpha_2, sizeof(__pyx_k_alpha_2), 0, 0, 1, 1}, + {&__pyx_n_s_cline_in_traceback, __pyx_k_cline_in_traceback, sizeof(__pyx_k_cline_in_traceback), 0, 0, 1, 1}, + {&__pyx_n_s_code, __pyx_k_code, sizeof(__pyx_k_code), 0, 0, 1, 1}, + {&__pyx_n_s_codelens, __pyx_k_codelens, sizeof(__pyx_k_codelens), 0, 0, 1, 1}, + {&__pyx_n_s_codes, __pyx_k_codes, sizeof(__pyx_k_codes), 0, 0, 1, 1}, + {&__pyx_n_s_cum_table, __pyx_k_cum_table, sizeof(__pyx_k_cum_table), 0, 0, 1, 1}, + {&__pyx_n_s_cum_table_len, __pyx_k_cum_table_len, sizeof(__pyx_k_cum_table_len), 0, 0, 1, 1}, + {&__pyx_n_s_d_res, __pyx_k_d_res, sizeof(__pyx_k_d_res), 0, 0, 1, 1}, + {&__pyx_n_s_effective_sentences, __pyx_k_effective_sentences, sizeof(__pyx_k_effective_sentences), 0, 0, 1, 1}, + {&__pyx_n_s_effective_words, __pyx_k_effective_words, sizeof(__pyx_k_effective_words), 0, 0, 1, 1}, + {&__pyx_n_s_enumerate, __pyx_k_enumerate, sizeof(__pyx_k_enumerate), 0, 0, 1, 1}, + {&__pyx_n_s_expected, __pyx_k_expected, sizeof(__pyx_k_expected), 0, 0, 1, 1}, + {&__pyx_kp_s_fasttext_inner_pyx, __pyx_k_fasttext_inner_pyx, sizeof(__pyx_k_fasttext_inner_pyx), 0, 0, 1, 0}, + {&__pyx_n_s_fblas, __pyx_k_fblas, sizeof(__pyx_k_fblas), 0, 0, 1, 1}, + {&__pyx_n_s_float32, __pyx_k_float32, sizeof(__pyx_k_float32), 0, 0, 1, 1}, + {&__pyx_n_s_gensim, __pyx_k_gensim, sizeof(__pyx_k_gensim), 0, 0, 1, 1}, + {&__pyx_n_s_gensim_models_fasttext_inner, __pyx_k_gensim_models_fasttext_inner, sizeof(__pyx_k_gensim_models_fasttext_inner), 0, 0, 1, 1}, + {&__pyx_n_s_hs, __pyx_k_hs, sizeof(__pyx_k_hs), 0, 0, 1, 1}, + {&__pyx_n_s_i, __pyx_k_i, sizeof(__pyx_k_i), 0, 0, 1, 1}, + {&__pyx_n_s_idx_end, __pyx_k_idx_end, sizeof(__pyx_k_idx_end), 0, 0, 1, 1}, + {&__pyx_n_s_idx_start, __pyx_k_idx_start, sizeof(__pyx_k_idx_start), 0, 0, 1, 1}, + {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1}, + {&__pyx_n_s_index, __pyx_k_index, sizeof(__pyx_k_index), 0, 0, 1, 1}, + {&__pyx_n_s_index2word, __pyx_k_index2word, sizeof(__pyx_k_index2word), 0, 0, 1, 1}, + {&__pyx_n_s_indexes, __pyx_k_indexes, sizeof(__pyx_k_indexes), 0, 0, 1, 1}, + {&__pyx_n_s_init, __pyx_k_init, sizeof(__pyx_k_init), 0, 0, 1, 1}, + {&__pyx_n_s_item, __pyx_k_item, sizeof(__pyx_k_item), 0, 0, 1, 1}, + {&__pyx_n_s_j, __pyx_k_j, sizeof(__pyx_k_j), 0, 0, 1, 1}, + {&__pyx_n_s_k, __pyx_k_k, sizeof(__pyx_k_k), 0, 0, 1, 1}, + {&__pyx_n_s_l1, __pyx_k_l1, sizeof(__pyx_k_l1), 0, 0, 1, 1}, + {&__pyx_n_s_l1_2, __pyx_k_l1_2, sizeof(__pyx_k_l1_2), 0, 0, 1, 1}, + {&__pyx_n_s_layer1_size, __pyx_k_layer1_size, sizeof(__pyx_k_layer1_size), 0, 0, 1, 1}, + {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, + {&__pyx_n_s_matutils, __pyx_k_matutils, sizeof(__pyx_k_matutils), 0, 0, 1, 1}, + {&__pyx_n_s_model, __pyx_k_model, sizeof(__pyx_k_model), 0, 0, 1, 1}, + {&__pyx_kp_u_ndarray_is_not_C_contiguous, __pyx_k_ndarray_is_not_C_contiguous, sizeof(__pyx_k_ndarray_is_not_C_contiguous), 0, 1, 0, 0}, + {&__pyx_kp_u_ndarray_is_not_Fortran_contiguou, __pyx_k_ndarray_is_not_Fortran_contiguou, sizeof(__pyx_k_ndarray_is_not_Fortran_contiguou), 0, 1, 0, 0}, + {&__pyx_n_s_negative, __pyx_k_negative, sizeof(__pyx_k_negative), 0, 0, 1, 1}, + {&__pyx_n_s_next_random, __pyx_k_next_random, sizeof(__pyx_k_next_random), 0, 0, 1, 1}, + {&__pyx_n_s_ngrams, __pyx_k_ngrams, sizeof(__pyx_k_ngrams), 0, 0, 1, 1}, + {&__pyx_n_s_ngrams_word, __pyx_k_ngrams_word, sizeof(__pyx_k_ngrams_word), 0, 0, 1, 1}, + {&__pyx_n_s_np, __pyx_k_np, sizeof(__pyx_k_np), 0, 0, 1, 1}, + {&__pyx_n_s_numpy, __pyx_k_numpy, sizeof(__pyx_k_numpy), 0, 0, 1, 1}, + {&__pyx_kp_s_numpy_core_multiarray_failed_to, __pyx_k_numpy_core_multiarray_failed_to, sizeof(__pyx_k_numpy_core_multiarray_failed_to), 0, 0, 1, 0}, + {&__pyx_kp_s_numpy_core_umath_failed_to_impor, __pyx_k_numpy_core_umath_failed_to_impor, sizeof(__pyx_k_numpy_core_umath_failed_to_impor), 0, 0, 1, 0}, + {&__pyx_n_s_p_res, __pyx_k_p_res, sizeof(__pyx_k_p_res), 0, 0, 1, 1}, + {&__pyx_n_s_point, __pyx_k_point, sizeof(__pyx_k_point), 0, 0, 1, 1}, + {&__pyx_n_s_points, __pyx_k_points, sizeof(__pyx_k_points), 0, 0, 1, 1}, + {&__pyx_n_s_randint, __pyx_k_randint, sizeof(__pyx_k_randint), 0, 0, 1, 1}, + {&__pyx_n_s_random, __pyx_k_random, sizeof(__pyx_k_random), 0, 0, 1, 1}, + {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1}, + {&__pyx_n_s_reduced_windows, __pyx_k_reduced_windows, sizeof(__pyx_k_reduced_windows), 0, 0, 1, 1}, + {&__pyx_n_s_sample, __pyx_k_sample, sizeof(__pyx_k_sample), 0, 0, 1, 1}, + {&__pyx_n_s_sample_int, __pyx_k_sample_int, sizeof(__pyx_k_sample_int), 0, 0, 1, 1}, + {&__pyx_n_s_scipy_linalg_blas, __pyx_k_scipy_linalg_blas, sizeof(__pyx_k_scipy_linalg_blas), 0, 0, 1, 1}, + {&__pyx_n_s_sent, __pyx_k_sent, sizeof(__pyx_k_sent), 0, 0, 1, 1}, + {&__pyx_n_s_sent_idx, __pyx_k_sent_idx, sizeof(__pyx_k_sent_idx), 0, 0, 1, 1}, + {&__pyx_n_s_sentence_idx, __pyx_k_sentence_idx, sizeof(__pyx_k_sentence_idx), 0, 0, 1, 1}, + {&__pyx_n_s_sentences, __pyx_k_sentences, sizeof(__pyx_k_sentences), 0, 0, 1, 1}, + {&__pyx_n_s_size, __pyx_k_size, sizeof(__pyx_k_size), 0, 0, 1, 1}, + {&__pyx_n_s_subword, __pyx_k_subword, sizeof(__pyx_k_subword), 0, 0, 1, 1}, + {&__pyx_n_s_subwords_idx, __pyx_k_subwords_idx, sizeof(__pyx_k_subwords_idx), 0, 0, 1, 1}, + {&__pyx_n_s_subwords_idx_len, __pyx_k_subwords_idx_len, sizeof(__pyx_k_subwords_idx_len), 0, 0, 1, 1}, + {&__pyx_n_s_syn0_ngrams, __pyx_k_syn0_ngrams, sizeof(__pyx_k_syn0_ngrams), 0, 0, 1, 1}, + {&__pyx_n_s_syn0_ngrams_lockf, __pyx_k_syn0_ngrams_lockf, sizeof(__pyx_k_syn0_ngrams_lockf), 0, 0, 1, 1}, + {&__pyx_n_s_syn0_vocab, __pyx_k_syn0_vocab, sizeof(__pyx_k_syn0_vocab), 0, 0, 1, 1}, + {&__pyx_n_s_syn0_vocab_lockf, __pyx_k_syn0_vocab_lockf, sizeof(__pyx_k_syn0_vocab_lockf), 0, 0, 1, 1}, + {&__pyx_n_s_syn1, __pyx_k_syn1, sizeof(__pyx_k_syn1), 0, 0, 1, 1}, + {&__pyx_n_s_syn1neg, __pyx_k_syn1neg, sizeof(__pyx_k_syn1neg), 0, 0, 1, 1}, + {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, + {&__pyx_n_s_token, __pyx_k_token, sizeof(__pyx_k_token), 0, 0, 1, 1}, + {&__pyx_n_s_train_batch_sg, __pyx_k_train_batch_sg, sizeof(__pyx_k_train_batch_sg), 0, 0, 1, 1}, + {&__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_k_unknown_dtype_code_in_numpy_pxd, sizeof(__pyx_k_unknown_dtype_code_in_numpy_pxd), 0, 1, 0, 0}, + {&__pyx_n_s_vlookup, __pyx_k_vlookup, sizeof(__pyx_k_vlookup), 0, 0, 1, 1}, + {&__pyx_n_s_vocab, __pyx_k_vocab, sizeof(__pyx_k_vocab), 0, 0, 1, 1}, + {&__pyx_n_s_window, __pyx_k_window, sizeof(__pyx_k_window), 0, 0, 1, 1}, + {&__pyx_n_s_word, __pyx_k_word, sizeof(__pyx_k_word), 0, 0, 1, 1}, + {&__pyx_n_s_word2vec, __pyx_k_word2vec, sizeof(__pyx_k_word2vec), 0, 0, 1, 1}, + {&__pyx_n_s_word_idx, __pyx_k_word_idx, sizeof(__pyx_k_word_idx), 0, 0, 1, 1}, + {&__pyx_n_s_word_locks_ngrams, __pyx_k_word_locks_ngrams, sizeof(__pyx_k_word_locks_ngrams), 0, 0, 1, 1}, + {&__pyx_n_s_word_locks_vocab, __pyx_k_word_locks_vocab, sizeof(__pyx_k_word_locks_vocab), 0, 0, 1, 1}, + {&__pyx_n_s_word_subwords, __pyx_k_word_subwords, sizeof(__pyx_k_word_subwords), 0, 0, 1, 1}, + {&__pyx_n_s_work, __pyx_k_work, sizeof(__pyx_k_work), 0, 0, 1, 1}, + {&__pyx_n_s_work_2, __pyx_k_work_2, sizeof(__pyx_k_work_2), 0, 0, 1, 1}, + {&__pyx_n_s_wv, __pyx_k_wv, sizeof(__pyx_k_wv), 0, 0, 1, 1}, + {&__pyx_n_s_x, __pyx_k_x, sizeof(__pyx_k_x), 0, 0, 1, 1}, + {&__pyx_n_s_y, __pyx_k_y, sizeof(__pyx_k_y), 0, 0, 1, 1}, + {0, 0, 0, 0, 0, 0, 0} +}; +static int __Pyx_InitCachedBuiltins(void) { + __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_n_s_ImportError); if (!__pyx_builtin_ImportError) __PYX_ERR(0, 19, __pyx_L1_error) + __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 63, __pyx_L1_error) + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(0, 218, __pyx_L1_error) + __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) __PYX_ERR(1, 235, __pyx_L1_error) + __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) __PYX_ERR(1, 823, __pyx_L1_error) + return 0; + __pyx_L1_error:; + return -1; +} + +static int __Pyx_InitCachedConstants(void) { + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); + + /* "gensim/models/fasttext_inner.pyx":180 + * cum_table_len = len(model.cum_table) + * if negative or sample: + * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< + * + * # convert Python structures to primitive types, so we can release the GIL + */ + __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 180, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple_); + __Pyx_GIVEREF(__pyx_tuple_); + __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(0, 180, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__2); + __Pyx_GIVEREF(__pyx_tuple__2); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":235 + * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) + * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): + * raise ValueError(u"ndarray is not C contiguous") # <<<<<<<<<<<<<< + * + * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) + */ + __pyx_tuple__3 = PyTuple_Pack(1, __pyx_kp_u_ndarray_is_not_C_contiguous); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(1, 235, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__3); + __Pyx_GIVEREF(__pyx_tuple__3); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":239 + * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) + * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): + * raise ValueError(u"ndarray is not Fortran contiguous") # <<<<<<<<<<<<<< + * + * info.buf = PyArray_DATA(self) + */ + __pyx_tuple__4 = PyTuple_Pack(1, __pyx_kp_u_ndarray_is_not_Fortran_contiguou); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(1, 239, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__4); + __Pyx_GIVEREF(__pyx_tuple__4); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":276 + * if ((descr.byteorder == c'>' and little_endian) or + * (descr.byteorder == c'<' and not little_endian)): + * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< + * if t == NPY_BYTE: f = "b" + * elif t == NPY_UBYTE: f = "B" + */ + __pyx_tuple__5 = PyTuple_Pack(1, __pyx_kp_u_Non_native_byte_order_not_suppor); if (unlikely(!__pyx_tuple__5)) __PYX_ERR(1, 276, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__5); + __Pyx_GIVEREF(__pyx_tuple__5); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":823 + * + * if (end - f) - (new_offset - offset[0]) < 15: + * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") # <<<<<<<<<<<<<< + * + * if ((child.byteorder == c'>' and little_endian) or + */ + __pyx_tuple__6 = PyTuple_Pack(1, __pyx_kp_u_Format_string_allocated_too_shor); if (unlikely(!__pyx_tuple__6)) __PYX_ERR(1, 823, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__6); + __Pyx_GIVEREF(__pyx_tuple__6); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":827 + * if ((child.byteorder == c'>' and little_endian) or + * (child.byteorder == c'<' and not little_endian)): + * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< + * # One could encode it in the format string and have Cython + * # complain instead, BUT: < and > in format strings also imply + */ + __pyx_tuple__7 = PyTuple_Pack(1, __pyx_kp_u_Non_native_byte_order_not_suppor); if (unlikely(!__pyx_tuple__7)) __PYX_ERR(1, 827, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__7); + __Pyx_GIVEREF(__pyx_tuple__7); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":847 + * t = child.type_num + * if end - f < 5: + * raise RuntimeError(u"Format string allocated too short.") # <<<<<<<<<<<<<< + * + * # Until ticket #99 is fixed, use integers to avoid warnings + */ + __pyx_tuple__8 = PyTuple_Pack(1, __pyx_kp_u_Format_string_allocated_too_shor_2); if (unlikely(!__pyx_tuple__8)) __PYX_ERR(1, 847, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__8); + __Pyx_GIVEREF(__pyx_tuple__8); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1013 + * _import_array() + * except Exception: + * raise ImportError("numpy.core.multiarray failed to import") # <<<<<<<<<<<<<< + * + * cdef inline int import_umath() except -1: + */ + __pyx_tuple__9 = PyTuple_Pack(1, __pyx_kp_s_numpy_core_multiarray_failed_to); if (unlikely(!__pyx_tuple__9)) __PYX_ERR(1, 1013, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__9); + __Pyx_GIVEREF(__pyx_tuple__9); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1019 + * _import_umath() + * except Exception: + * raise ImportError("numpy.core.umath failed to import") # <<<<<<<<<<<<<< + * + * cdef inline int import_ufunc() except -1: + */ + __pyx_tuple__10 = PyTuple_Pack(1, __pyx_kp_s_numpy_core_umath_failed_to_impor); if (unlikely(!__pyx_tuple__10)) __PYX_ERR(1, 1019, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__10); + __Pyx_GIVEREF(__pyx_tuple__10); + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1025 + * _import_umath() + * except Exception: + * raise ImportError("numpy.core.umath failed to import") # <<<<<<<<<<<<<< + */ + __pyx_tuple__11 = PyTuple_Pack(1, __pyx_kp_s_numpy_core_umath_failed_to_impor); if (unlikely(!__pyx_tuple__11)) __PYX_ERR(1, 1025, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__11); + __Pyx_GIVEREF(__pyx_tuple__11); + + /* "gensim/models/fasttext_inner.pyx":130 + * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) + * + * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< + * cdef int hs = model.hs + * cdef int negative = model.negative + */ + __pyx_tuple__13 = PyTuple_Pack(46, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_l1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_l1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_item, __pyx_n_s_word_idx, __pyx_n_s_word_subwords, __pyx_n_s_subword); if (unlikely(!__pyx_tuple__13)) __PYX_ERR(0, 130, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__13); + __Pyx_GIVEREF(__pyx_tuple__13); + __pyx_codeobj__14 = (PyObject*)__Pyx_PyCode_New(5, 0, 46, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__13, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_train_batch_sg, 130, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__14)) __PYX_ERR(0, 130, __pyx_L1_error) + + /* "gensim/models/fasttext_inner.pyx":251 + * + * + * def init(): # <<<<<<<<<<<<<< + * """ + * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized + */ + __pyx_tuple__15 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__15)) __PYX_ERR(0, 251, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__15); + __Pyx_GIVEREF(__pyx_tuple__15); + __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_init, 251, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) __PYX_ERR(0, 251, __pyx_L1_error) + __Pyx_RefNannyFinishContext(); + return 0; + __pyx_L1_error:; + __Pyx_RefNannyFinishContext(); + return -1; +} + +static int __Pyx_InitGlobals(void) { + if (__Pyx_InitStrings(__pyx_string_tab) < 0) __PYX_ERR(0, 1, __pyx_L1_error); + __pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_int_2 = PyInt_FromLong(2); if (unlikely(!__pyx_int_2)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_int_10000 = PyInt_FromLong(10000L); if (unlikely(!__pyx_int_10000)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_int_16777216 = PyInt_FromLong(16777216L); if (unlikely(!__pyx_int_16777216)) __PYX_ERR(0, 1, __pyx_L1_error) + return 0; + __pyx_L1_error:; + return -1; +} + +#if PY_MAJOR_VERSION < 3 +PyMODINIT_FUNC initfasttext_inner(void); /*proto*/ +PyMODINIT_FUNC initfasttext_inner(void) +#else +PyMODINIT_FUNC PyInit_fasttext_inner(void); /*proto*/ +PyMODINIT_FUNC PyInit_fasttext_inner(void) +#if CYTHON_PEP489_MULTI_PHASE_INIT +{ + return PyModuleDef_Init(&__pyx_moduledef); +} +static int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *moddict, const char* from_name, const char* to_name) { + PyObject *value = PyObject_GetAttrString(spec, from_name); + int result = 0; + if (likely(value)) { + result = PyDict_SetItemString(moddict, to_name, value); + Py_DECREF(value); + } else if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + } else { + result = -1; + } + return result; +} +static PyObject* __pyx_pymod_create(PyObject *spec, CYTHON_UNUSED PyModuleDef *def) { + PyObject *module = NULL, *moddict, *modname; + if (__pyx_m) + return __Pyx_NewRef(__pyx_m); + modname = PyObject_GetAttrString(spec, "name"); + if (unlikely(!modname)) goto bad; + module = PyModule_NewObject(modname); + Py_DECREF(modname); + if (unlikely(!module)) goto bad; + moddict = PyModule_GetDict(module); + if (unlikely(!moddict)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "loader", "__loader__") < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "origin", "__file__") < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "parent", "__package__") < 0)) goto bad; + if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "submodule_search_locations", "__path__") < 0)) goto bad; + return module; +bad: + Py_XDECREF(module); + return NULL; +} + + +static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) +#endif +#endif +{ + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + PyObject *__pyx_t_4 = NULL; + PyObject *__pyx_t_5 = NULL; + PyObject *__pyx_t_6 = NULL; + PyObject *__pyx_t_7 = NULL; + int __pyx_t_8; + PyObject *__pyx_t_9 = NULL; + PyObject *__pyx_t_10 = NULL; + PyObject *__pyx_t_11 = NULL; + __Pyx_RefNannyDeclarations + #if CYTHON_PEP489_MULTI_PHASE_INIT + if (__pyx_m && __pyx_m == __pyx_pyinit_module) return 0; + #endif + #if CYTHON_REFNANNY + __Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny"); + if (!__Pyx_RefNanny) { + PyErr_Clear(); + __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny"); + if (!__Pyx_RefNanny) + Py_FatalError("failed to import 'refnanny' module"); + } + #endif + __Pyx_RefNannySetupContext("PyMODINIT_FUNC PyInit_fasttext_inner(void)", 0); + if (__Pyx_check_binary_version() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_empty_unicode = PyUnicode_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_unicode)) __PYX_ERR(0, 1, __pyx_L1_error) + #ifdef __Pyx_CyFunction_USED + if (__pyx_CyFunction_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_FusedFunction_USED + if (__pyx_FusedFunction_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_Coroutine_USED + if (__pyx_Coroutine_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_Generator_USED + if (__pyx_Generator_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_AsyncGen_USED + if (__pyx_AsyncGen_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + #ifdef __Pyx_StopAsyncIteration_USED + if (__pyx_StopAsyncIteration_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + /*--- Library function declarations ---*/ + /*--- Threads initialization code ---*/ + #if defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS + #ifdef WITH_THREAD /* Python build with threading support? */ + PyEval_InitThreads(); + #endif + #endif + /*--- Module creation code ---*/ + #if CYTHON_PEP489_MULTI_PHASE_INIT + __pyx_m = __pyx_pyinit_module; + Py_INCREF(__pyx_m); + #else + #if PY_MAJOR_VERSION < 3 + __pyx_m = Py_InitModule4("fasttext_inner", __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); + #else + __pyx_m = PyModule_Create(&__pyx_moduledef); + #endif + if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) __PYX_ERR(0, 1, __pyx_L1_error) + Py_INCREF(__pyx_d); + __pyx_b = PyImport_AddModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) __PYX_ERR(0, 1, __pyx_L1_error) + __pyx_cython_runtime = PyImport_AddModule((char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error) + #if CYTHON_COMPILING_IN_PYPY + Py_INCREF(__pyx_b); + #endif + if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error); + /*--- Initialize various global constants etc. ---*/ + if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) + if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + if (__pyx_module_is_main_gensim__models__fasttext_inner) { + if (PyObject_SetAttrString(__pyx_m, "__name__", __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + } + #if PY_MAJOR_VERSION >= 3 + { + PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error) + if (!PyDict_GetItemString(modules, "gensim.models.fasttext_inner")) { + if (unlikely(PyDict_SetItemString(modules, "gensim.models.fasttext_inner", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error) + } + } + #endif + /*--- Builtin init code ---*/ + if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + /*--- Constants init code ---*/ + if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + /*--- Global init code ---*/ + /*--- Variable export code ---*/ + /*--- Function export code ---*/ + /*--- Type init code ---*/ + /*--- Type import code ---*/ + __pyx_ptype_7cpython_4type_type = __Pyx_ImportType(__Pyx_BUILTIN_MODULE_NAME, "type", + #if CYTHON_COMPILING_IN_PYPY + sizeof(PyTypeObject), + #else + sizeof(PyHeapTypeObject), + #endif + 0); if (unlikely(!__pyx_ptype_7cpython_4type_type)) __PYX_ERR(2, 9, __pyx_L1_error) + __pyx_ptype_5numpy_dtype = __Pyx_ImportType("numpy", "dtype", sizeof(PyArray_Descr), 0); if (unlikely(!__pyx_ptype_5numpy_dtype)) __PYX_ERR(1, 163, __pyx_L1_error) + __pyx_ptype_5numpy_flatiter = __Pyx_ImportType("numpy", "flatiter", sizeof(PyArrayIterObject), 0); if (unlikely(!__pyx_ptype_5numpy_flatiter)) __PYX_ERR(1, 185, __pyx_L1_error) + __pyx_ptype_5numpy_broadcast = __Pyx_ImportType("numpy", "broadcast", sizeof(PyArrayMultiIterObject), 0); if (unlikely(!__pyx_ptype_5numpy_broadcast)) __PYX_ERR(1, 189, __pyx_L1_error) + __pyx_ptype_5numpy_ndarray = __Pyx_ImportType("numpy", "ndarray", sizeof(PyArrayObject), 0); if (unlikely(!__pyx_ptype_5numpy_ndarray)) __PYX_ERR(1, 198, __pyx_L1_error) + __pyx_ptype_5numpy_ufunc = __Pyx_ImportType("numpy", "ufunc", sizeof(PyUFuncObject), 0); if (unlikely(!__pyx_ptype_5numpy_ufunc)) __PYX_ERR(1, 885, __pyx_L1_error) + /*--- Variable import code ---*/ + __pyx_t_1 = __Pyx_ImportModule("gensim.models.word2vec_inner"); if (!__pyx_t_1) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_ImportVoidPtr(__pyx_t_1, "scopy", (void **)&__pyx_vp_6gensim_6models_14word2vec_inner_scopy, "__pyx_t_6gensim_6models_14word2vec_inner_scopy_ptr") < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_ImportVoidPtr(__pyx_t_1, "saxpy", (void **)&__pyx_vp_6gensim_6models_14word2vec_inner_saxpy, "__pyx_t_6gensim_6models_14word2vec_inner_saxpy_ptr") < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_ImportVoidPtr(__pyx_t_1, "sdot", (void **)&__pyx_vp_6gensim_6models_14word2vec_inner_sdot, "__pyx_t_6gensim_6models_14word2vec_inner_sdot_ptr") < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_ImportVoidPtr(__pyx_t_1, "dsdot", (void **)&__pyx_vp_6gensim_6models_14word2vec_inner_dsdot, "__pyx_t_6gensim_6models_14word2vec_inner_dsdot_ptr") < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_ImportVoidPtr(__pyx_t_1, "snrm2", (void **)&__pyx_vp_6gensim_6models_14word2vec_inner_snrm2, "__pyx_t_6gensim_6models_14word2vec_inner_snrm2_ptr") < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_ImportVoidPtr(__pyx_t_1, "sscal", (void **)&__pyx_vp_6gensim_6models_14word2vec_inner_sscal, "__pyx_t_6gensim_6models_14word2vec_inner_sscal_ptr") < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_ImportVoidPtr(__pyx_t_1, "EXP_TABLE", (void **)&__pyx_vp_6gensim_6models_14word2vec_inner_EXP_TABLE, "__pyx_t_6gensim_6models_14word2vec_inner_REAL_t [0x3E8]") < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_ImportVoidPtr(__pyx_t_1, "our_dot", (void **)&__pyx_vp_6gensim_6models_14word2vec_inner_our_dot, "__pyx_t_6gensim_6models_14word2vec_inner_our_dot_ptr") < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_ImportVoidPtr(__pyx_t_1, "our_saxpy", (void **)&__pyx_vp_6gensim_6models_14word2vec_inner_our_saxpy, "__pyx_t_6gensim_6models_14word2vec_inner_our_saxpy_ptr") < 0) __PYX_ERR(0, 1, __pyx_L1_error) + Py_DECREF(__pyx_t_1); __pyx_t_1 = 0; + /*--- Function import code ---*/ + __pyx_t_2 = __Pyx_ImportModule("gensim.models.word2vec_inner"); if (!__pyx_t_2) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_ImportFunction(__pyx_t_2, "our_dot_double", (void (**)(void))&__pyx_f_6gensim_6models_14word2vec_inner_our_dot_double, "__pyx_t_6gensim_6models_14word2vec_inner_REAL_t (int const *, float const *, int const *, float const *, int const *)") < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_ImportFunction(__pyx_t_2, "our_dot_float", (void (**)(void))&__pyx_f_6gensim_6models_14word2vec_inner_our_dot_float, "__pyx_t_6gensim_6models_14word2vec_inner_REAL_t (int const *, float const *, int const *, float const *, int const *)") < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_ImportFunction(__pyx_t_2, "our_dot_noblas", (void (**)(void))&__pyx_f_6gensim_6models_14word2vec_inner_our_dot_noblas, "__pyx_t_6gensim_6models_14word2vec_inner_REAL_t (int const *, float const *, int const *, float const *, int const *)") < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_ImportFunction(__pyx_t_2, "our_saxpy_noblas", (void (**)(void))&__pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas, "void (int const *, float const *, float const *, int const *, float *, int const *)") < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_ImportFunction(__pyx_t_2, "bisect_left", (void (**)(void))&__pyx_f_6gensim_6models_14word2vec_inner_bisect_left, "unsigned PY_LONG_LONG (__pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG)") < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_ImportFunction(__pyx_t_2, "random_int32", (void (**)(void))&__pyx_f_6gensim_6models_14word2vec_inner_random_int32, "unsigned PY_LONG_LONG (unsigned PY_LONG_LONG *)") < 0) __PYX_ERR(0, 1, __pyx_L1_error) + Py_DECREF(__pyx_t_2); __pyx_t_2 = 0; + /*--- Execution code ---*/ + #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) + if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + #endif + + /* "gensim/models/fasttext_inner.pyx":5 + * + * import cython + * import numpy as np # <<<<<<<<<<<<<< + * cimport numpy as np + * + */ + __pyx_t_3 = __Pyx_Import(__pyx_n_s_numpy, 0, -1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 5, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_3) < 0) __PYX_ERR(0, 5, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "gensim/models/fasttext_inner.pyx":13 + * from libc.stdio cimport printf + * from libc.stdlib cimport calloc, free + * from gensim import matutils # <<<<<<<<<<<<<< + * + * + */ + __pyx_t_3 = PyList_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 13, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_INCREF(__pyx_n_s_matutils); + __Pyx_GIVEREF(__pyx_n_s_matutils); + PyList_SET_ITEM(__pyx_t_3, 0, __pyx_n_s_matutils); + __pyx_t_4 = __Pyx_Import(__pyx_n_s_gensim, __pyx_t_3, -1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 13, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_4, __pyx_n_s_matutils); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 13, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_matutils, __pyx_t_3) < 0) __PYX_ERR(0, 13, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + + /* "gensim/models/fasttext_inner.pyx":17 + * + * # scipy <= 0.15 + * try: # <<<<<<<<<<<<<< + * from scipy.linalg.blas import fblas + * except ImportError: + */ + { + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ExceptionSave(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7); + __Pyx_XGOTREF(__pyx_t_5); + __Pyx_XGOTREF(__pyx_t_6); + __Pyx_XGOTREF(__pyx_t_7); + /*try:*/ { + + /* "gensim/models/fasttext_inner.pyx":18 + * # scipy <= 0.15 + * try: + * from scipy.linalg.blas import fblas # <<<<<<<<<<<<<< + * except ImportError: + * # in scipy > 0.15, fblas function has been removed + */ + __pyx_t_4 = PyList_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 18, __pyx_L2_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_INCREF(__pyx_n_s_fblas); + __Pyx_GIVEREF(__pyx_n_s_fblas); + PyList_SET_ITEM(__pyx_t_4, 0, __pyx_n_s_fblas); + __pyx_t_3 = __Pyx_Import(__pyx_n_s_scipy_linalg_blas, __pyx_t_4, -1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 18, __pyx_L2_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_fblas); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 18, __pyx_L2_error) + __Pyx_GOTREF(__pyx_t_4); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_fblas, __pyx_t_4) < 0) __PYX_ERR(0, 18, __pyx_L2_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "gensim/models/fasttext_inner.pyx":17 + * + * # scipy <= 0.15 + * try: # <<<<<<<<<<<<<< + * from scipy.linalg.blas import fblas + * except ImportError: + */ + } + __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; + __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0; + goto __pyx_L7_try_end; + __pyx_L2_error:; + __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "gensim/models/fasttext_inner.pyx":19 + * try: + * from scipy.linalg.blas import fblas + * except ImportError: # <<<<<<<<<<<<<< + * # in scipy > 0.15, fblas function has been removed + * import scipy.linalg.blas as fblas + */ + __pyx_t_8 = __Pyx_PyErr_ExceptionMatches(__pyx_builtin_ImportError); + if (__pyx_t_8) { + __Pyx_AddTraceback("gensim.models.fasttext_inner", __pyx_clineno, __pyx_lineno, __pyx_filename); + if (__Pyx_GetException(&__pyx_t_3, &__pyx_t_4, &__pyx_t_9) < 0) __PYX_ERR(0, 19, __pyx_L4_except_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_GOTREF(__pyx_t_4); + __Pyx_GOTREF(__pyx_t_9); + + /* "gensim/models/fasttext_inner.pyx":21 + * except ImportError: + * # in scipy > 0.15, fblas function has been removed + * import scipy.linalg.blas as fblas # <<<<<<<<<<<<<< + * + * from word2vec_inner cimport bisect_left, random_int32, \ + */ + __pyx_t_10 = PyList_New(1); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 21, __pyx_L4_except_error) + __Pyx_GOTREF(__pyx_t_10); + __Pyx_INCREF(__pyx_n_s__12); + __Pyx_GIVEREF(__pyx_n_s__12); + PyList_SET_ITEM(__pyx_t_10, 0, __pyx_n_s__12); + __pyx_t_11 = __Pyx_Import(__pyx_n_s_scipy_linalg_blas, __pyx_t_10, -1); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 21, __pyx_L4_except_error) + __Pyx_GOTREF(__pyx_t_11); + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + if (PyDict_SetItem(__pyx_d, __pyx_n_s_fblas, __pyx_t_11) < 0) __PYX_ERR(0, 21, __pyx_L4_except_error) + __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + goto __pyx_L3_exception_handled; + } + goto __pyx_L4_except_error; + __pyx_L4_except_error:; + + /* "gensim/models/fasttext_inner.pyx":17 + * + * # scipy <= 0.15 + * try: # <<<<<<<<<<<<<< + * from scipy.linalg.blas import fblas + * except ImportError: + */ + __Pyx_XGIVEREF(__pyx_t_5); + __Pyx_XGIVEREF(__pyx_t_6); + __Pyx_XGIVEREF(__pyx_t_7); + __Pyx_ExceptionReset(__pyx_t_5, __pyx_t_6, __pyx_t_7); + goto __pyx_L1_error; + __pyx_L3_exception_handled:; + __Pyx_XGIVEREF(__pyx_t_5); + __Pyx_XGIVEREF(__pyx_t_6); + __Pyx_XGIVEREF(__pyx_t_7); + __Pyx_ExceptionReset(__pyx_t_5, __pyx_t_6, __pyx_t_7); + __pyx_L7_try_end:; + } + + /* "gensim/models/fasttext_inner.pyx":29 + * our_dot_double, our_dot_float, our_dot_noblas, our_saxpy_noblas + * + * REAL = np.float32 # <<<<<<<<<<<<<< + * + * DEF MAX_SENTENCE_LEN = 10000 + */ + __pyx_t_9 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 29, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_9, __pyx_n_s_float32); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 29, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + if (PyDict_SetItem(__pyx_d, __pyx_n_s_REAL, __pyx_t_4) < 0) __PYX_ERR(0, 29, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + + /* "gensim/models/fasttext_inner.pyx":33 + * DEF MAX_SENTENCE_LEN = 10000 + * DEF MAX_SUBWORDS = 1000 + * from word2vec import FAST_VERSION # <<<<<<<<<<<<<< + * + * DEF EXP_TABLE_SIZE = 1000 + */ + __pyx_t_4 = PyList_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 33, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __Pyx_INCREF(__pyx_n_s_FAST_VERSION); + __Pyx_GIVEREF(__pyx_n_s_FAST_VERSION); + PyList_SET_ITEM(__pyx_t_4, 0, __pyx_n_s_FAST_VERSION); + __pyx_t_9 = __Pyx_Import(__pyx_n_s_word2vec, __pyx_t_4, -1); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 33, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = __Pyx_ImportFrom(__pyx_t_9, __pyx_n_s_FAST_VERSION); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 33, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_4) < 0) __PYX_ERR(0, 33, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + + /* "gensim/models/fasttext_inner.pyx":41 + * cdef REAL_t[EXP_TABLE_SIZE] LOG_TABLE + * + * cdef int ONE = 1 # <<<<<<<<<<<<<< + * cdef REAL_t ONEF = 1.0 + * + */ + __pyx_v_6gensim_6models_14fasttext_inner_ONE = 1; + + /* "gensim/models/fasttext_inner.pyx":42 + * + * cdef int ONE = 1 + * cdef REAL_t ONEF = 1.0 # <<<<<<<<<<<<<< + * + * cdef unsigned long long fast_sentence_sg_neg( + */ + __pyx_v_6gensim_6models_14fasttext_inner_ONEF = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)1.0); + + /* "gensim/models/fasttext_inner.pyx":130 + * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) + * + * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< + * cdef int hs = model.hs + * cdef int negative = model.negative + */ + __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_1train_batch_sg, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 130, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_sg, __pyx_t_9) < 0) __PYX_ERR(0, 130, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + + /* "gensim/models/fasttext_inner.pyx":251 + * + * + * def init(): # <<<<<<<<<<<<<< + * """ + * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized + */ + __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_3init, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 251, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_9) < 0) __PYX_ERR(0, 251, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + + /* "gensim/models/fasttext_inner.pyx":291 + * return 2 + * + * FAST_VERSION = init() # initialize the module # <<<<<<<<<<<<<< + * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN + */ + __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 291, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_3 = NULL; + if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_4))) { + __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_4); + if (likely(__pyx_t_3)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4); + __Pyx_INCREF(__pyx_t_3); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_4, function); + } + } + if (__pyx_t_3) { + __pyx_t_9 = __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_t_3); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 291, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + } else { + __pyx_t_9 = __Pyx_PyObject_CallNoArg(__pyx_t_4); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 291, __pyx_L1_error) + } + __Pyx_GOTREF(__pyx_t_9); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_9) < 0) __PYX_ERR(0, 291, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + + /* "gensim/models/fasttext_inner.pyx":292 + * + * FAST_VERSION = init() # initialize the module + * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN # <<<<<<<<<<<<<< + */ + if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_int_10000) < 0) __PYX_ERR(0, 292, __pyx_L1_error) + + /* "gensim/models/fasttext_inner.pyx":1 + * #!/usr/bin/env cython # <<<<<<<<<<<<<< + * # coding: utf-8 + * + */ + __pyx_t_9 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_9) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1021 + * raise ImportError("numpy.core.umath failed to import") + * + * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< + * try: + * _import_umath() + */ + + /*--- Wrapped vars code ---*/ + + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_9); + __Pyx_XDECREF(__pyx_t_10); + __Pyx_XDECREF(__pyx_t_11); + if (__pyx_m) { + if (__pyx_d) { + __Pyx_AddTraceback("init gensim.models.fasttext_inner", 0, __pyx_lineno, __pyx_filename); + } + Py_DECREF(__pyx_m); __pyx_m = 0; + } else if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ImportError, "init gensim.models.fasttext_inner"); + } + __pyx_L0:; + __Pyx_RefNannyFinishContext(); + #if CYTHON_PEP489_MULTI_PHASE_INIT + return (__pyx_m != NULL) ? 0 : -1; + #elif PY_MAJOR_VERSION >= 3 + return __pyx_m; + #else + return; + #endif +} + +/* --- Runtime support code --- */ +/* Refnanny */ +#if CYTHON_REFNANNY +static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) { + PyObject *m = NULL, *p = NULL; + void *r = NULL; + m = PyImport_ImportModule((char *)modname); + if (!m) goto end; + p = PyObject_GetAttrString(m, (char *)"RefNannyAPI"); + if (!p) goto end; + r = PyLong_AsVoidPtr(p); +end: + Py_XDECREF(p); + Py_XDECREF(m); + return (__Pyx_RefNannyAPIStruct *)r; +} +#endif + +/* GetBuiltinName */ +static PyObject *__Pyx_GetBuiltinName(PyObject *name) { + PyObject* result = __Pyx_PyObject_GetAttrStr(__pyx_b, name); + if (unlikely(!result)) { + PyErr_Format(PyExc_NameError, +#if PY_MAJOR_VERSION >= 3 + "name '%U' is not defined", name); +#else + "name '%.200s' is not defined", PyString_AS_STRING(name)); +#endif + } + return result; +} + +/* PyErrFetchRestore */ +#if CYTHON_FAST_THREAD_STATE +static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { + PyObject *tmp_type, *tmp_value, *tmp_tb; + tmp_type = tstate->curexc_type; + tmp_value = tstate->curexc_value; + tmp_tb = tstate->curexc_traceback; + tstate->curexc_type = type; + tstate->curexc_value = value; + tstate->curexc_traceback = tb; + Py_XDECREF(tmp_type); + Py_XDECREF(tmp_value); + Py_XDECREF(tmp_tb); +} +static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { + *type = tstate->curexc_type; + *value = tstate->curexc_value; + *tb = tstate->curexc_traceback; + tstate->curexc_type = 0; + tstate->curexc_value = 0; + tstate->curexc_traceback = 0; +} +#endif + +/* WriteUnraisableException */ +static void __Pyx_WriteUnraisable(const char *name, CYTHON_UNUSED int clineno, + CYTHON_UNUSED int lineno, CYTHON_UNUSED const char *filename, + int full_traceback, CYTHON_UNUSED int nogil) { + PyObject *old_exc, *old_val, *old_tb; + PyObject *ctx; + __Pyx_PyThreadState_declare +#ifdef WITH_THREAD + PyGILState_STATE state; + if (nogil) + state = PyGILState_Ensure(); +#ifdef _MSC_VER + else state = (PyGILState_STATE)-1; +#endif +#endif + __Pyx_PyThreadState_assign + __Pyx_ErrFetch(&old_exc, &old_val, &old_tb); + if (full_traceback) { + Py_XINCREF(old_exc); + Py_XINCREF(old_val); + Py_XINCREF(old_tb); + __Pyx_ErrRestore(old_exc, old_val, old_tb); + PyErr_PrintEx(1); + } + #if PY_MAJOR_VERSION < 3 + ctx = PyString_FromString(name); + #else + ctx = PyUnicode_FromString(name); + #endif + __Pyx_ErrRestore(old_exc, old_val, old_tb); + if (!ctx) { + PyErr_WriteUnraisable(Py_None); + } else { + PyErr_WriteUnraisable(ctx); + Py_DECREF(ctx); + } +#ifdef WITH_THREAD + if (nogil) + PyGILState_Release(state); +#endif +} + +/* RaiseArgTupleInvalid */ +static void __Pyx_RaiseArgtupleInvalid( + const char* func_name, + int exact, + Py_ssize_t num_min, + Py_ssize_t num_max, + Py_ssize_t num_found) +{ + Py_ssize_t num_expected; + const char *more_or_less; + if (num_found < num_min) { + num_expected = num_min; + more_or_less = "at least"; + } else { + num_expected = num_max; + more_or_less = "at most"; + } + if (exact) { + more_or_less = "exactly"; + } + PyErr_Format(PyExc_TypeError, + "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)", + func_name, more_or_less, num_expected, + (num_expected == 1) ? "" : "s", num_found); +} + +/* RaiseDoubleKeywords */ +static void __Pyx_RaiseDoubleKeywordsError( + const char* func_name, + PyObject* kw_name) +{ + PyErr_Format(PyExc_TypeError, + #if PY_MAJOR_VERSION >= 3 + "%s() got multiple values for keyword argument '%U'", func_name, kw_name); + #else + "%s() got multiple values for keyword argument '%s'", func_name, + PyString_AsString(kw_name)); + #endif +} + +/* ParseKeywords */ +static int __Pyx_ParseOptionalKeywords( + PyObject *kwds, + PyObject **argnames[], + PyObject *kwds2, + PyObject *values[], + Py_ssize_t num_pos_args, + const char* function_name) +{ + PyObject *key = 0, *value = 0; + Py_ssize_t pos = 0; + PyObject*** name; + PyObject*** first_kw_arg = argnames + num_pos_args; + while (PyDict_Next(kwds, &pos, &key, &value)) { + name = first_kw_arg; + while (*name && (**name != key)) name++; + if (*name) { + values[name-argnames] = value; + continue; + } + name = first_kw_arg; + #if PY_MAJOR_VERSION < 3 + if (likely(PyString_CheckExact(key)) || likely(PyString_Check(key))) { + while (*name) { + if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key)) + && _PyString_Eq(**name, key)) { + values[name-argnames] = value; + break; + } + name++; + } + if (*name) continue; + else { + PyObject*** argname = argnames; + while (argname != first_kw_arg) { + if ((**argname == key) || ( + (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key)) + && _PyString_Eq(**argname, key))) { + goto arg_passed_twice; + } + argname++; + } + } + } else + #endif + if (likely(PyUnicode_Check(key))) { + while (*name) { + int cmp = (**name == key) ? 0 : + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 + (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 : + #endif + PyUnicode_Compare(**name, key); + if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; + if (cmp == 0) { + values[name-argnames] = value; + break; + } + name++; + } + if (*name) continue; + else { + PyObject*** argname = argnames; + while (argname != first_kw_arg) { + int cmp = (**argname == key) ? 0 : + #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 + (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 : + #endif + PyUnicode_Compare(**argname, key); + if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; + if (cmp == 0) goto arg_passed_twice; + argname++; + } + } + } else + goto invalid_keyword_type; + if (kwds2) { + if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad; + } else { + goto invalid_keyword; + } + } + return 0; +arg_passed_twice: + __Pyx_RaiseDoubleKeywordsError(function_name, key); + goto bad; +invalid_keyword_type: + PyErr_Format(PyExc_TypeError, + "%.200s() keywords must be strings", function_name); + goto bad; +invalid_keyword: + PyErr_Format(PyExc_TypeError, + #if PY_MAJOR_VERSION < 3 + "%.200s() got an unexpected keyword argument '%.200s'", + function_name, PyString_AsString(key)); + #else + "%s() got an unexpected keyword argument '%U'", + function_name, key); + #endif +bad: + return -1; +} + +/* ExtTypeTest */ +static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type) { + if (unlikely(!type)) { + PyErr_SetString(PyExc_SystemError, "Missing type object"); + return 0; + } + if (likely(__Pyx_TypeCheck(obj, type))) + return 1; + PyErr_Format(PyExc_TypeError, "Cannot convert %.200s to %.200s", + Py_TYPE(obj)->tp_name, type->tp_name); + return 0; +} + +/* PyObjectCall */ +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) { + PyObject *result; + ternaryfunc call = func->ob_type->tp_call; + if (unlikely(!call)) + return PyObject_Call(func, arg, kw); + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + result = (*call)(func, arg, kw); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +/* PyFunctionFastCall */ +#if CYTHON_FAST_PYCALL +#include "frameobject.h" +static PyObject* __Pyx_PyFunction_FastCallNoKw(PyCodeObject *co, PyObject **args, Py_ssize_t na, + PyObject *globals) { + PyFrameObject *f; + PyThreadState *tstate = __Pyx_PyThreadState_Current; + PyObject **fastlocals; + Py_ssize_t i; + PyObject *result; + assert(globals != NULL); + /* XXX Perhaps we should create a specialized + PyFrame_New() that doesn't take locals, but does + take builtins without sanity checking them. + */ + assert(tstate != NULL); + f = PyFrame_New(tstate, co, globals, NULL); + if (f == NULL) { + return NULL; + } + fastlocals = f->f_localsplus; + for (i = 0; i < na; i++) { + Py_INCREF(*args); + fastlocals[i] = *args++; + } + result = PyEval_EvalFrameEx(f,0); + ++tstate->recursion_depth; + Py_DECREF(f); + --tstate->recursion_depth; + return result; +} +#if 1 || PY_VERSION_HEX < 0x030600B1 +static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, int nargs, PyObject *kwargs) { + PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); + PyObject *globals = PyFunction_GET_GLOBALS(func); + PyObject *argdefs = PyFunction_GET_DEFAULTS(func); + PyObject *closure; +#if PY_MAJOR_VERSION >= 3 + PyObject *kwdefs; +#endif + PyObject *kwtuple, **k; + PyObject **d; + Py_ssize_t nd; + Py_ssize_t nk; + PyObject *result; + assert(kwargs == NULL || PyDict_Check(kwargs)); + nk = kwargs ? PyDict_Size(kwargs) : 0; + if (Py_EnterRecursiveCall((char*)" while calling a Python object")) { + return NULL; + } + if ( +#if PY_MAJOR_VERSION >= 3 + co->co_kwonlyargcount == 0 && +#endif + likely(kwargs == NULL || nk == 0) && + co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) { + if (argdefs == NULL && co->co_argcount == nargs) { + result = __Pyx_PyFunction_FastCallNoKw(co, args, nargs, globals); + goto done; + } + else if (nargs == 0 && argdefs != NULL + && co->co_argcount == Py_SIZE(argdefs)) { + /* function called with no arguments, but all parameters have + a default value: use default values as arguments .*/ + args = &PyTuple_GET_ITEM(argdefs, 0); + result =__Pyx_PyFunction_FastCallNoKw(co, args, Py_SIZE(argdefs), globals); + goto done; + } + } + if (kwargs != NULL) { + Py_ssize_t pos, i; + kwtuple = PyTuple_New(2 * nk); + if (kwtuple == NULL) { + result = NULL; + goto done; + } + k = &PyTuple_GET_ITEM(kwtuple, 0); + pos = i = 0; + while (PyDict_Next(kwargs, &pos, &k[i], &k[i+1])) { + Py_INCREF(k[i]); + Py_INCREF(k[i+1]); + i += 2; + } + nk = i / 2; + } + else { + kwtuple = NULL; + k = NULL; + } + closure = PyFunction_GET_CLOSURE(func); +#if PY_MAJOR_VERSION >= 3 + kwdefs = PyFunction_GET_KW_DEFAULTS(func); +#endif + if (argdefs != NULL) { + d = &PyTuple_GET_ITEM(argdefs, 0); + nd = Py_SIZE(argdefs); + } + else { + d = NULL; + nd = 0; + } +#if PY_MAJOR_VERSION >= 3 + result = PyEval_EvalCodeEx((PyObject*)co, globals, (PyObject *)NULL, + args, nargs, + k, (int)nk, + d, (int)nd, kwdefs, closure); +#else + result = PyEval_EvalCodeEx(co, globals, (PyObject *)NULL, + args, nargs, + k, (int)nk, + d, (int)nd, closure); +#endif + Py_XDECREF(kwtuple); +done: + Py_LeaveRecursiveCall(); + return result; +} +#endif +#endif + +/* PyCFunctionFastCall */ +#if CYTHON_FAST_PYCCALL +static CYTHON_INLINE PyObject * __Pyx_PyCFunction_FastCall(PyObject *func_obj, PyObject **args, Py_ssize_t nargs) { + PyCFunctionObject *func = (PyCFunctionObject*)func_obj; + PyCFunction meth = PyCFunction_GET_FUNCTION(func); + PyObject *self = PyCFunction_GET_SELF(func); + int flags = PyCFunction_GET_FLAGS(func); + assert(PyCFunction_Check(func)); + assert(METH_FASTCALL == (flags & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_KEYWORDS))); + assert(nargs >= 0); + assert(nargs == 0 || args != NULL); + /* _PyCFunction_FastCallDict() must not be called with an exception set, + because it may clear it (directly or indirectly) and so the + caller loses its exception */ + assert(!PyErr_Occurred()); + if ((PY_VERSION_HEX < 0x030700A0) || unlikely(flags & METH_KEYWORDS)) { + return (*((__Pyx_PyCFunctionFastWithKeywords)meth)) (self, args, nargs, NULL); + } else { + return (*((__Pyx_PyCFunctionFast)meth)) (self, args, nargs); + } +} +#endif + +/* GetItemInt */ +static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) { + PyObject *r; + if (!j) return NULL; + r = PyObject_GetItem(o, j); + Py_DECREF(j); + return r; +} +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, + CYTHON_NCP_UNUSED int wraparound, + CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + Py_ssize_t wrapped_i = i; + if (wraparound & unlikely(i < 0)) { + wrapped_i += PyList_GET_SIZE(o); + } + if ((!boundscheck) || likely((0 <= wrapped_i) & (wrapped_i < PyList_GET_SIZE(o)))) { + PyObject *r = PyList_GET_ITEM(o, wrapped_i); + Py_INCREF(r); + return r; + } + return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); +#else + return PySequence_GetItem(o, i); +#endif +} +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, + CYTHON_NCP_UNUSED int wraparound, + CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + Py_ssize_t wrapped_i = i; + if (wraparound & unlikely(i < 0)) { + wrapped_i += PyTuple_GET_SIZE(o); + } + if ((!boundscheck) || likely((0 <= wrapped_i) & (wrapped_i < PyTuple_GET_SIZE(o)))) { + PyObject *r = PyTuple_GET_ITEM(o, wrapped_i); + Py_INCREF(r); + return r; + } + return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); +#else + return PySequence_GetItem(o, i); +#endif +} +static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list, + CYTHON_NCP_UNUSED int wraparound, + CYTHON_NCP_UNUSED int boundscheck) { +#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS + if (is_list || PyList_CheckExact(o)) { + Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o); + if ((!boundscheck) || (likely((n >= 0) & (n < PyList_GET_SIZE(o))))) { + PyObject *r = PyList_GET_ITEM(o, n); + Py_INCREF(r); + return r; + } + } + else if (PyTuple_CheckExact(o)) { + Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o); + if ((!boundscheck) || likely((n >= 0) & (n < PyTuple_GET_SIZE(o)))) { + PyObject *r = PyTuple_GET_ITEM(o, n); + Py_INCREF(r); + return r; + } + } else { + PySequenceMethods *m = Py_TYPE(o)->tp_as_sequence; + if (likely(m && m->sq_item)) { + if (wraparound && unlikely(i < 0) && likely(m->sq_length)) { + Py_ssize_t l = m->sq_length(o); + if (likely(l >= 0)) { + i += l; + } else { + if (!PyErr_ExceptionMatches(PyExc_OverflowError)) + return NULL; + PyErr_Clear(); + } + } + return m->sq_item(o, i); + } + } +#else + if (is_list || PySequence_Check(o)) { + return PySequence_GetItem(o, i); + } +#endif + return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); +} + +/* RaiseException */ +#if PY_MAJOR_VERSION < 3 +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, + CYTHON_UNUSED PyObject *cause) { + __Pyx_PyThreadState_declare + Py_XINCREF(type); + if (!value || value == Py_None) + value = NULL; + else + Py_INCREF(value); + if (!tb || tb == Py_None) + tb = NULL; + else { + Py_INCREF(tb); + if (!PyTraceBack_Check(tb)) { + PyErr_SetString(PyExc_TypeError, + "raise: arg 3 must be a traceback or None"); + goto raise_error; + } + } + if (PyType_Check(type)) { +#if CYTHON_COMPILING_IN_PYPY + if (!value) { + Py_INCREF(Py_None); + value = Py_None; + } +#endif + PyErr_NormalizeException(&type, &value, &tb); + } else { + if (value) { + PyErr_SetString(PyExc_TypeError, + "instance exception may not have a separate value"); + goto raise_error; + } + value = type; + type = (PyObject*) Py_TYPE(type); + Py_INCREF(type); + if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) { + PyErr_SetString(PyExc_TypeError, + "raise: exception class must be a subclass of BaseException"); + goto raise_error; + } + } + __Pyx_PyThreadState_assign + __Pyx_ErrRestore(type, value, tb); + return; +raise_error: + Py_XDECREF(value); + Py_XDECREF(type); + Py_XDECREF(tb); + return; +} +#else +static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) { + PyObject* owned_instance = NULL; + if (tb == Py_None) { + tb = 0; + } else if (tb && !PyTraceBack_Check(tb)) { + PyErr_SetString(PyExc_TypeError, + "raise: arg 3 must be a traceback or None"); + goto bad; + } + if (value == Py_None) + value = 0; + if (PyExceptionInstance_Check(type)) { + if (value) { + PyErr_SetString(PyExc_TypeError, + "instance exception may not have a separate value"); + goto bad; + } + value = type; + type = (PyObject*) Py_TYPE(value); + } else if (PyExceptionClass_Check(type)) { + PyObject *instance_class = NULL; + if (value && PyExceptionInstance_Check(value)) { + instance_class = (PyObject*) Py_TYPE(value); + if (instance_class != type) { + int is_subclass = PyObject_IsSubclass(instance_class, type); + if (!is_subclass) { + instance_class = NULL; + } else if (unlikely(is_subclass == -1)) { + goto bad; + } else { + type = instance_class; + } + } + } + if (!instance_class) { + PyObject *args; + if (!value) + args = PyTuple_New(0); + else if (PyTuple_Check(value)) { + Py_INCREF(value); + args = value; + } else + args = PyTuple_Pack(1, value); + if (!args) + goto bad; + owned_instance = PyObject_Call(type, args, NULL); + Py_DECREF(args); + if (!owned_instance) + goto bad; + value = owned_instance; + if (!PyExceptionInstance_Check(value)) { + PyErr_Format(PyExc_TypeError, + "calling %R should have returned an instance of " + "BaseException, not %R", + type, Py_TYPE(value)); + goto bad; + } + } + } else { + PyErr_SetString(PyExc_TypeError, + "raise: exception class must be a subclass of BaseException"); + goto bad; + } + if (cause) { + PyObject *fixed_cause; + if (cause == Py_None) { + fixed_cause = NULL; + } else if (PyExceptionClass_Check(cause)) { + fixed_cause = PyObject_CallObject(cause, NULL); + if (fixed_cause == NULL) + goto bad; + } else if (PyExceptionInstance_Check(cause)) { + fixed_cause = cause; + Py_INCREF(fixed_cause); + } else { + PyErr_SetString(PyExc_TypeError, + "exception causes must derive from " + "BaseException"); + goto bad; + } + PyException_SetCause(value, fixed_cause); + } + PyErr_SetObject(type, value); + if (tb) { +#if CYTHON_COMPILING_IN_PYPY + PyObject *tmp_type, *tmp_value, *tmp_tb; + PyErr_Fetch(&tmp_type, &tmp_value, &tmp_tb); + Py_INCREF(tb); + PyErr_Restore(tmp_type, tmp_value, tb); + Py_XDECREF(tmp_tb); +#else + PyThreadState *tstate = __Pyx_PyThreadState_Current; + PyObject* tmp_tb = tstate->curexc_traceback; + if (tb != tmp_tb) { + Py_INCREF(tb); + tstate->curexc_traceback = tb; + Py_XDECREF(tmp_tb); + } +#endif + } +bad: + Py_XDECREF(owned_instance); + return; +} +#endif + +/* RaiseTooManyValuesToUnpack */ +static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected) { + PyErr_Format(PyExc_ValueError, + "too many values to unpack (expected %" CYTHON_FORMAT_SSIZE_T "d)", expected); +} + +/* RaiseNeedMoreValuesToUnpack */ +static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) { + PyErr_Format(PyExc_ValueError, + "need more than %" CYTHON_FORMAT_SSIZE_T "d value%.1s to unpack", + index, (index == 1) ? "" : "s"); +} + +/* RaiseNoneIterError */ +static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void) { + PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); +} + +/* SaveResetException */ +#if CYTHON_FAST_THREAD_STATE +static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { + #if PY_VERSION_HEX >= 0x030700A2 + *type = tstate->exc_state.exc_type; + *value = tstate->exc_state.exc_value; + *tb = tstate->exc_state.exc_traceback; + #else + *type = tstate->exc_type; + *value = tstate->exc_value; + *tb = tstate->exc_traceback; + #endif + Py_XINCREF(*type); + Py_XINCREF(*value); + Py_XINCREF(*tb); +} +static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { + PyObject *tmp_type, *tmp_value, *tmp_tb; + #if PY_VERSION_HEX >= 0x030700A2 + tmp_type = tstate->exc_state.exc_type; + tmp_value = tstate->exc_state.exc_value; + tmp_tb = tstate->exc_state.exc_traceback; + tstate->exc_state.exc_type = type; + tstate->exc_state.exc_value = value; + tstate->exc_state.exc_traceback = tb; + #else + tmp_type = tstate->exc_type; + tmp_value = tstate->exc_value; + tmp_tb = tstate->exc_traceback; + tstate->exc_type = type; + tstate->exc_value = value; + tstate->exc_traceback = tb; + #endif + Py_XDECREF(tmp_type); + Py_XDECREF(tmp_value); + Py_XDECREF(tmp_tb); +} +#endif + +/* PyErrExceptionMatches */ +#if CYTHON_FAST_THREAD_STATE +static int __Pyx_PyErr_ExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(tuple); +#if PY_MAJOR_VERSION >= 3 + for (i=0; icurexc_type; + if (exc_type == err) return 1; + if (unlikely(!exc_type)) return 0; + if (unlikely(PyTuple_Check(err))) + return __Pyx_PyErr_ExceptionMatchesTuple(exc_type, err); + return __Pyx_PyErr_GivenExceptionMatches(exc_type, err); +} +#endif + +/* GetException */ +#if CYTHON_FAST_THREAD_STATE +static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { +#else +static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb) { +#endif + PyObject *local_type, *local_value, *local_tb; +#if CYTHON_FAST_THREAD_STATE + PyObject *tmp_type, *tmp_value, *tmp_tb; + local_type = tstate->curexc_type; + local_value = tstate->curexc_value; + local_tb = tstate->curexc_traceback; + tstate->curexc_type = 0; + tstate->curexc_value = 0; + tstate->curexc_traceback = 0; +#else + PyErr_Fetch(&local_type, &local_value, &local_tb); +#endif + PyErr_NormalizeException(&local_type, &local_value, &local_tb); +#if CYTHON_FAST_THREAD_STATE + if (unlikely(tstate->curexc_type)) +#else + if (unlikely(PyErr_Occurred())) +#endif + goto bad; + #if PY_MAJOR_VERSION >= 3 + if (local_tb) { + if (unlikely(PyException_SetTraceback(local_value, local_tb) < 0)) + goto bad; + } + #endif + Py_XINCREF(local_tb); + Py_XINCREF(local_type); + Py_XINCREF(local_value); + *type = local_type; + *value = local_value; + *tb = local_tb; +#if CYTHON_FAST_THREAD_STATE + #if PY_VERSION_HEX >= 0x030700A2 + tmp_type = tstate->exc_state.exc_type; + tmp_value = tstate->exc_state.exc_value; + tmp_tb = tstate->exc_state.exc_traceback; + tstate->exc_state.exc_type = local_type; + tstate->exc_state.exc_value = local_value; + tstate->exc_state.exc_traceback = local_tb; + #else + tmp_type = tstate->exc_type; + tmp_value = tstate->exc_value; + tmp_tb = tstate->exc_traceback; + tstate->exc_type = local_type; + tstate->exc_value = local_value; + tstate->exc_traceback = local_tb; + #endif + Py_XDECREF(tmp_type); + Py_XDECREF(tmp_value); + Py_XDECREF(tmp_tb); +#else + PyErr_SetExcInfo(local_type, local_value, local_tb); +#endif + return 0; +bad: + *type = 0; + *value = 0; + *tb = 0; + Py_XDECREF(local_type); + Py_XDECREF(local_value); + Py_XDECREF(local_tb); + return -1; +} + +/* Import */ + static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { + PyObject *empty_list = 0; + PyObject *module = 0; + PyObject *global_dict = 0; + PyObject *empty_dict = 0; + PyObject *list; + #if PY_MAJOR_VERSION < 3 + PyObject *py_import; + py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import); + if (!py_import) + goto bad; + #endif + if (from_list) + list = from_list; + else { + empty_list = PyList_New(0); + if (!empty_list) + goto bad; + list = empty_list; + } + global_dict = PyModule_GetDict(__pyx_m); + if (!global_dict) + goto bad; + empty_dict = PyDict_New(); + if (!empty_dict) + goto bad; + { + #if PY_MAJOR_VERSION >= 3 + if (level == -1) { + if (strchr(__Pyx_MODULE_NAME, '.')) { + module = PyImport_ImportModuleLevelObject( + name, global_dict, empty_dict, list, 1); + if (!module) { + if (!PyErr_ExceptionMatches(PyExc_ImportError)) + goto bad; + PyErr_Clear(); + } + } + level = 0; + } + #endif + if (!module) { + #if PY_MAJOR_VERSION < 3 + PyObject *py_level = PyInt_FromLong(level); + if (!py_level) + goto bad; + module = PyObject_CallFunctionObjArgs(py_import, + name, global_dict, empty_dict, list, py_level, NULL); + Py_DECREF(py_level); + #else + module = PyImport_ImportModuleLevelObject( + name, global_dict, empty_dict, list, level); + #endif + } + } +bad: + #if PY_MAJOR_VERSION < 3 + Py_XDECREF(py_import); + #endif + Py_XDECREF(empty_list); + Py_XDECREF(empty_dict); + return module; +} + +/* ImportFrom */ + static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) { + PyObject* value = __Pyx_PyObject_GetAttrStr(module, name); + if (unlikely(!value) && PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Format(PyExc_ImportError, + #if PY_MAJOR_VERSION < 3 + "cannot import name %.230s", PyString_AS_STRING(name)); + #else + "cannot import name %S", name); + #endif + } + return value; +} + +/* GetModuleGlobalName */ + static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name) { + PyObject *result; +#if !CYTHON_AVOID_BORROWED_REFS + result = PyDict_GetItem(__pyx_d, name); + if (likely(result)) { + Py_INCREF(result); + } else { +#else + result = PyObject_GetItem(__pyx_d, name); + if (!result) { + PyErr_Clear(); +#endif + result = __Pyx_GetBuiltinName(name); + } + return result; +} + +/* PyObjectCallMethO */ + #if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { + PyObject *self, *result; + PyCFunction cfunc; + cfunc = PyCFunction_GET_FUNCTION(func); + self = PyCFunction_GET_SELF(func); + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + result = cfunc(self, arg); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +/* PyObjectCallOneArg */ + #if CYTHON_COMPILING_IN_CPYTHON +static PyObject* __Pyx__PyObject_CallOneArg(PyObject *func, PyObject *arg) { + PyObject *result; + PyObject *args = PyTuple_New(1); + if (unlikely(!args)) return NULL; + Py_INCREF(arg); + PyTuple_SET_ITEM(args, 0, arg); + result = __Pyx_PyObject_Call(func, args, NULL); + Py_DECREF(args); + return result; +} +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { +#if CYTHON_FAST_PYCALL + if (PyFunction_Check(func)) { + return __Pyx_PyFunction_FastCall(func, &arg, 1); + } +#endif + if (likely(PyCFunction_Check(func))) { + if (likely(PyCFunction_GET_FLAGS(func) & METH_O)) { + return __Pyx_PyObject_CallMethO(func, arg); +#if CYTHON_FAST_PYCCALL + } else if (PyCFunction_GET_FLAGS(func) & METH_FASTCALL) { + return __Pyx_PyCFunction_FastCall(func, &arg, 1); +#endif + } + } + return __Pyx__PyObject_CallOneArg(func, arg); +} +#else +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { + PyObject *result; + PyObject *args = PyTuple_Pack(1, arg); + if (unlikely(!args)) return NULL; + result = __Pyx_PyObject_Call(func, args, NULL); + Py_DECREF(args); + return result; +} +#endif + +/* PyObjectCallNoArg */ + #if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) { +#if CYTHON_FAST_PYCALL + if (PyFunction_Check(func)) { + return __Pyx_PyFunction_FastCall(func, NULL, 0); + } +#endif +#ifdef __Pyx_CyFunction_USED + if (likely(PyCFunction_Check(func) || __Pyx_TypeCheck(func, __pyx_CyFunctionType))) { +#else + if (likely(PyCFunction_Check(func))) { +#endif + if (likely(PyCFunction_GET_FLAGS(func) & METH_NOARGS)) { + return __Pyx_PyObject_CallMethO(func, NULL); + } + } + return __Pyx_PyObject_Call(func, __pyx_empty_tuple, NULL); +} +#endif + +/* CLineInTraceback */ + #ifndef CYTHON_CLINE_IN_TRACEBACK +static int __Pyx_CLineForTraceback(CYTHON_UNUSED PyThreadState *tstate, int c_line) { + PyObject *use_cline; + PyObject *ptype, *pvalue, *ptraceback; +#if CYTHON_COMPILING_IN_CPYTHON + PyObject **cython_runtime_dict; +#endif + __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); +#if CYTHON_COMPILING_IN_CPYTHON + cython_runtime_dict = _PyObject_GetDictPtr(__pyx_cython_runtime); + if (likely(cython_runtime_dict)) { + use_cline = PyDict_GetItem(*cython_runtime_dict, __pyx_n_s_cline_in_traceback); + } else +#endif + { + PyObject *use_cline_obj = __Pyx_PyObject_GetAttrStr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback); + if (use_cline_obj) { + use_cline = PyObject_Not(use_cline_obj) ? Py_False : Py_True; + Py_DECREF(use_cline_obj); + } else { + PyErr_Clear(); + use_cline = NULL; + } + } + if (!use_cline) { + c_line = 0; + PyObject_SetAttr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback, Py_False); + } + else if (PyObject_Not(use_cline) != 0) { + c_line = 0; + } + __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); + return c_line; +} +#endif + +/* CodeObjectCache */ + static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { + int start = 0, mid = 0, end = count - 1; + if (end >= 0 && code_line > entries[end].code_line) { + return count; + } + while (start < end) { + mid = start + (end - start) / 2; + if (code_line < entries[mid].code_line) { + end = mid; + } else if (code_line > entries[mid].code_line) { + start = mid + 1; + } else { + return mid; + } + } + if (code_line <= entries[mid].code_line) { + return mid; + } else { + return mid + 1; + } +} +static PyCodeObject *__pyx_find_code_object(int code_line) { + PyCodeObject* code_object; + int pos; + if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) { + return NULL; + } + pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); + if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) { + return NULL; + } + code_object = __pyx_code_cache.entries[pos].code_object; + Py_INCREF(code_object); + return code_object; +} +static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) { + int pos, i; + __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries; + if (unlikely(!code_line)) { + return; + } + if (unlikely(!entries)) { + entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry)); + if (likely(entries)) { + __pyx_code_cache.entries = entries; + __pyx_code_cache.max_count = 64; + __pyx_code_cache.count = 1; + entries[0].code_line = code_line; + entries[0].code_object = code_object; + Py_INCREF(code_object); + } + return; + } + pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); + if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) { + PyCodeObject* tmp = entries[pos].code_object; + entries[pos].code_object = code_object; + Py_DECREF(tmp); + return; + } + if (__pyx_code_cache.count == __pyx_code_cache.max_count) { + int new_max = __pyx_code_cache.max_count + 64; + entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc( + __pyx_code_cache.entries, (size_t)new_max*sizeof(__Pyx_CodeObjectCacheEntry)); + if (unlikely(!entries)) { + return; + } + __pyx_code_cache.entries = entries; + __pyx_code_cache.max_count = new_max; + } + for (i=__pyx_code_cache.count; i>pos; i--) { + entries[i] = entries[i-1]; + } + entries[pos].code_line = code_line; + entries[pos].code_object = code_object; + __pyx_code_cache.count++; + Py_INCREF(code_object); +} + +/* AddTraceback */ + #include "compile.h" +#include "frameobject.h" +#include "traceback.h" +static PyCodeObject* __Pyx_CreateCodeObjectForTraceback( + const char *funcname, int c_line, + int py_line, const char *filename) { + PyCodeObject *py_code = 0; + PyObject *py_srcfile = 0; + PyObject *py_funcname = 0; + #if PY_MAJOR_VERSION < 3 + py_srcfile = PyString_FromString(filename); + #else + py_srcfile = PyUnicode_FromString(filename); + #endif + if (!py_srcfile) goto bad; + if (c_line) { + #if PY_MAJOR_VERSION < 3 + py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); + #else + py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); + #endif + } + else { + #if PY_MAJOR_VERSION < 3 + py_funcname = PyString_FromString(funcname); + #else + py_funcname = PyUnicode_FromString(funcname); + #endif + } + if (!py_funcname) goto bad; + py_code = __Pyx_PyCode_New( + 0, + 0, + 0, + 0, + 0, + __pyx_empty_bytes, /*PyObject *code,*/ + __pyx_empty_tuple, /*PyObject *consts,*/ + __pyx_empty_tuple, /*PyObject *names,*/ + __pyx_empty_tuple, /*PyObject *varnames,*/ + __pyx_empty_tuple, /*PyObject *freevars,*/ + __pyx_empty_tuple, /*PyObject *cellvars,*/ + py_srcfile, /*PyObject *filename,*/ + py_funcname, /*PyObject *name,*/ + py_line, + __pyx_empty_bytes /*PyObject *lnotab*/ + ); + Py_DECREF(py_srcfile); + Py_DECREF(py_funcname); + return py_code; +bad: + Py_XDECREF(py_srcfile); + Py_XDECREF(py_funcname); + return NULL; +} +static void __Pyx_AddTraceback(const char *funcname, int c_line, + int py_line, const char *filename) { + PyCodeObject *py_code = 0; + PyFrameObject *py_frame = 0; + PyThreadState *tstate = __Pyx_PyThreadState_Current; + if (c_line) { + c_line = __Pyx_CLineForTraceback(tstate, c_line); + } + py_code = __pyx_find_code_object(c_line ? -c_line : py_line); + if (!py_code) { + py_code = __Pyx_CreateCodeObjectForTraceback( + funcname, c_line, py_line, filename); + if (!py_code) goto bad; + __pyx_insert_code_object(c_line ? -c_line : py_line, py_code); + } + py_frame = PyFrame_New( + tstate, /*PyThreadState *tstate,*/ + py_code, /*PyCodeObject *code,*/ + __pyx_d, /*PyObject *globals,*/ + 0 /*PyObject *locals*/ + ); + if (!py_frame) goto bad; + __Pyx_PyFrame_SetLineNumber(py_frame, py_line); + PyTraceBack_Here(py_frame); +bad: + Py_XDECREF(py_code); + Py_XDECREF(py_frame); +} + +/* CIntToPy */ + static CYTHON_INLINE PyObject* __Pyx_PyInt_From_npy_uint32(npy_uint32 value) { + const npy_uint32 neg_one = (npy_uint32) -1, const_zero = (npy_uint32) 0; + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(npy_uint32) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(npy_uint32) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(npy_uint32) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); +#endif + } + } else { + if (sizeof(npy_uint32) <= sizeof(long)) { + return PyInt_FromLong((long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(npy_uint32) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); +#endif + } + } + { + int one = 1; int little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&value; + return _PyLong_FromByteArray(bytes, sizeof(npy_uint32), + little, !is_unsigned); + } +} + +/* CIntFromPyVerify */ + #define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\ + __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0) +#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\ + __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1) +#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\ + {\ + func_type value = func_value;\ + if (sizeof(target_type) < sizeof(func_type)) {\ + if (unlikely(value != (func_type) (target_type) value)) {\ + func_type zero = 0;\ + if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\ + return (target_type) -1;\ + if (is_unsigned && unlikely(value < zero))\ + goto raise_neg_overflow;\ + else\ + goto raise_overflow;\ + }\ + }\ + return (target_type) value;\ + } + +/* CIntToPy */ + static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { + const long neg_one = (long) -1, const_zero = (long) 0; + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(long) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(long) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); +#endif + } + } else { + if (sizeof(long) <= sizeof(long)) { + return PyInt_FromLong((long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); +#endif + } + } + { + int one = 1; int little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&value; + return _PyLong_FromByteArray(bytes, sizeof(long), + little, !is_unsigned); + } +} + +/* CIntToPy */ + static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) { + const int neg_one = (int) -1, const_zero = (int) 0; + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(int) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(int) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); +#endif + } + } else { + if (sizeof(int) <= sizeof(long)) { + return PyInt_FromLong((long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); +#endif + } + } + { + int one = 1; int little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&value; + return _PyLong_FromByteArray(bytes, sizeof(int), + little, !is_unsigned); + } +} + +/* CIntToPy */ + static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_PY_LONG_LONG(unsigned PY_LONG_LONG value) { + const unsigned PY_LONG_LONG neg_one = (unsigned PY_LONG_LONG) -1, const_zero = (unsigned PY_LONG_LONG) 0; + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(unsigned PY_LONG_LONG) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(unsigned PY_LONG_LONG) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(unsigned PY_LONG_LONG) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); +#endif + } + } else { + if (sizeof(unsigned PY_LONG_LONG) <= sizeof(long)) { + return PyInt_FromLong((long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(unsigned PY_LONG_LONG) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); +#endif + } + } + { + int one = 1; int little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&value; + return _PyLong_FromByteArray(bytes, sizeof(unsigned PY_LONG_LONG), + little, !is_unsigned); + } +} + +/* Declarations */ + #if CYTHON_CCOMPLEX + #ifdef __cplusplus + static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) { + return ::std::complex< float >(x, y); + } + #else + static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) { + return x + y*(__pyx_t_float_complex)_Complex_I; + } + #endif +#else + static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) { + __pyx_t_float_complex z; + z.real = x; + z.imag = y; + return z; + } +#endif + +/* Arithmetic */ + #if CYTHON_CCOMPLEX +#else + static CYTHON_INLINE int __Pyx_c_eq_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + return (a.real == b.real) && (a.imag == b.imag); + } + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sum_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + __pyx_t_float_complex z; + z.real = a.real + b.real; + z.imag = a.imag + b.imag; + return z; + } + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_diff_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + __pyx_t_float_complex z; + z.real = a.real - b.real; + z.imag = a.imag - b.imag; + return z; + } + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prod_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + __pyx_t_float_complex z; + z.real = a.real * b.real - a.imag * b.imag; + z.imag = a.real * b.imag + a.imag * b.real; + return z; + } + #if 1 + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + if (b.imag == 0) { + return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.real); + } else if (fabsf(b.real) >= fabsf(b.imag)) { + if (b.real == 0 && b.imag == 0) { + return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.imag); + } else { + float r = b.imag / b.real; + float s = 1.0 / (b.real + b.imag * r); + return __pyx_t_float_complex_from_parts( + (a.real + a.imag * r) * s, (a.imag - a.real * r) * s); + } + } else { + float r = b.real / b.imag; + float s = 1.0 / (b.imag + b.real * r); + return __pyx_t_float_complex_from_parts( + (a.real * r + a.imag) * s, (a.imag * r - a.real) * s); + } + } + #else + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + if (b.imag == 0) { + return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.real); + } else { + float denom = b.real * b.real + b.imag * b.imag; + return __pyx_t_float_complex_from_parts( + (a.real * b.real + a.imag * b.imag) / denom, + (a.imag * b.real - a.real * b.imag) / denom); + } + } + #endif + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_neg_float(__pyx_t_float_complex a) { + __pyx_t_float_complex z; + z.real = -a.real; + z.imag = -a.imag; + return z; + } + static CYTHON_INLINE int __Pyx_c_is_zero_float(__pyx_t_float_complex a) { + return (a.real == 0) && (a.imag == 0); + } + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conj_float(__pyx_t_float_complex a) { + __pyx_t_float_complex z; + z.real = a.real; + z.imag = -a.imag; + return z; + } + #if 1 + static CYTHON_INLINE float __Pyx_c_abs_float(__pyx_t_float_complex z) { + #if !defined(HAVE_HYPOT) || defined(_MSC_VER) + return sqrtf(z.real*z.real + z.imag*z.imag); + #else + return hypotf(z.real, z.imag); + #endif + } + static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_pow_float(__pyx_t_float_complex a, __pyx_t_float_complex b) { + __pyx_t_float_complex z; + float r, lnr, theta, z_r, z_theta; + if (b.imag == 0 && b.real == (int)b.real) { + if (b.real < 0) { + float denom = a.real * a.real + a.imag * a.imag; + a.real = a.real / denom; + a.imag = -a.imag / denom; + b.real = -b.real; + } + switch ((int)b.real) { + case 0: + z.real = 1; + z.imag = 0; + return z; + case 1: + return a; + case 2: + z = __Pyx_c_prod_float(a, a); + return __Pyx_c_prod_float(a, a); + case 3: + z = __Pyx_c_prod_float(a, a); + return __Pyx_c_prod_float(z, a); + case 4: + z = __Pyx_c_prod_float(a, a); + return __Pyx_c_prod_float(z, z); + } + } + if (a.imag == 0) { + if (a.real == 0) { + return a; + } else if (b.imag == 0) { + z.real = powf(a.real, b.real); + z.imag = 0; + return z; + } else if (a.real > 0) { + r = a.real; + theta = 0; + } else { + r = -a.real; + theta = atan2f(0, -1); + } + } else { + r = __Pyx_c_abs_float(a); + theta = atan2f(a.imag, a.real); + } + lnr = logf(r); + z_r = expf(lnr * b.real - theta * b.imag); + z_theta = theta * b.real + lnr * b.imag; + z.real = z_r * cosf(z_theta); + z.imag = z_r * sinf(z_theta); + return z; + } + #endif +#endif + +/* Declarations */ + #if CYTHON_CCOMPLEX + #ifdef __cplusplus + static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) { + return ::std::complex< double >(x, y); + } + #else + static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) { + return x + y*(__pyx_t_double_complex)_Complex_I; + } + #endif +#else + static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) { + __pyx_t_double_complex z; + z.real = x; + z.imag = y; + return z; + } +#endif + +/* Arithmetic */ + #if CYTHON_CCOMPLEX +#else + static CYTHON_INLINE int __Pyx_c_eq_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + return (a.real == b.real) && (a.imag == b.imag); + } + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + __pyx_t_double_complex z; + z.real = a.real + b.real; + z.imag = a.imag + b.imag; + return z; + } + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + __pyx_t_double_complex z; + z.real = a.real - b.real; + z.imag = a.imag - b.imag; + return z; + } + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + __pyx_t_double_complex z; + z.real = a.real * b.real - a.imag * b.imag; + z.imag = a.real * b.imag + a.imag * b.real; + return z; + } + #if 1 + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + if (b.imag == 0) { + return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.real); + } else if (fabs(b.real) >= fabs(b.imag)) { + if (b.real == 0 && b.imag == 0) { + return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.imag); + } else { + double r = b.imag / b.real; + double s = 1.0 / (b.real + b.imag * r); + return __pyx_t_double_complex_from_parts( + (a.real + a.imag * r) * s, (a.imag - a.real * r) * s); + } + } else { + double r = b.real / b.imag; + double s = 1.0 / (b.imag + b.real * r); + return __pyx_t_double_complex_from_parts( + (a.real * r + a.imag) * s, (a.imag * r - a.real) * s); + } + } + #else + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + if (b.imag == 0) { + return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.real); + } else { + double denom = b.real * b.real + b.imag * b.imag; + return __pyx_t_double_complex_from_parts( + (a.real * b.real + a.imag * b.imag) / denom, + (a.imag * b.real - a.real * b.imag) / denom); + } + } + #endif + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg_double(__pyx_t_double_complex a) { + __pyx_t_double_complex z; + z.real = -a.real; + z.imag = -a.imag; + return z; + } + static CYTHON_INLINE int __Pyx_c_is_zero_double(__pyx_t_double_complex a) { + return (a.real == 0) && (a.imag == 0); + } + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj_double(__pyx_t_double_complex a) { + __pyx_t_double_complex z; + z.real = a.real; + z.imag = -a.imag; + return z; + } + #if 1 + static CYTHON_INLINE double __Pyx_c_abs_double(__pyx_t_double_complex z) { + #if !defined(HAVE_HYPOT) || defined(_MSC_VER) + return sqrt(z.real*z.real + z.imag*z.imag); + #else + return hypot(z.real, z.imag); + #endif + } + static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow_double(__pyx_t_double_complex a, __pyx_t_double_complex b) { + __pyx_t_double_complex z; + double r, lnr, theta, z_r, z_theta; + if (b.imag == 0 && b.real == (int)b.real) { + if (b.real < 0) { + double denom = a.real * a.real + a.imag * a.imag; + a.real = a.real / denom; + a.imag = -a.imag / denom; + b.real = -b.real; + } + switch ((int)b.real) { + case 0: + z.real = 1; + z.imag = 0; + return z; + case 1: + return a; + case 2: + z = __Pyx_c_prod_double(a, a); + return __Pyx_c_prod_double(a, a); + case 3: + z = __Pyx_c_prod_double(a, a); + return __Pyx_c_prod_double(z, a); + case 4: + z = __Pyx_c_prod_double(a, a); + return __Pyx_c_prod_double(z, z); + } + } + if (a.imag == 0) { + if (a.real == 0) { + return a; + } else if (b.imag == 0) { + z.real = pow(a.real, b.real); + z.imag = 0; + return z; + } else if (a.real > 0) { + r = a.real; + theta = 0; + } else { + r = -a.real; + theta = atan2(0, -1); + } + } else { + r = __Pyx_c_abs_double(a); + theta = atan2(a.imag, a.real); + } + lnr = log(r); + z_r = exp(lnr * b.real - theta * b.imag); + z_theta = theta * b.real + lnr * b.imag; + z.real = z_r * cos(z_theta); + z.imag = z_r * sin(z_theta); + return z; + } + #endif +#endif + +/* CIntToPy */ + static CYTHON_INLINE PyObject* __Pyx_PyInt_From_enum__NPY_TYPES(enum NPY_TYPES value) { + const enum NPY_TYPES neg_one = (enum NPY_TYPES) -1, const_zero = (enum NPY_TYPES) 0; + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(enum NPY_TYPES) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(enum NPY_TYPES) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(enum NPY_TYPES) <= sizeof(unsigned PY_LONG_LONG)) { + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); +#endif + } + } else { + if (sizeof(enum NPY_TYPES) <= sizeof(long)) { + return PyInt_FromLong((long) value); +#ifdef HAVE_LONG_LONG + } else if (sizeof(enum NPY_TYPES) <= sizeof(PY_LONG_LONG)) { + return PyLong_FromLongLong((PY_LONG_LONG) value); +#endif + } + } + { + int one = 1; int little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&value; + return _PyLong_FromByteArray(bytes, sizeof(enum NPY_TYPES), + little, !is_unsigned); + } +} + +/* CIntFromPy */ + static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { + const int neg_one = (int) -1, const_zero = (int) 0; + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if (sizeof(int) < sizeof(long)) { + __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (int) val; + } + } else +#endif + if (likely(PyLong_Check(x))) { + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (int) 0; + case 1: __PYX_VERIFY_RETURN_INT(int, digit, digits[0]) + case 2: + if (8 * sizeof(int) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) >= 2 * PyLong_SHIFT) { + return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + case 3: + if (8 * sizeof(int) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) >= 3 * PyLong_SHIFT) { + return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + case 4: + if (8 * sizeof(int) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) >= 4 * PyLong_SHIFT) { + return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); + } + } + break; + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (int) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if (sizeof(int) <= sizeof(unsigned long)) { + __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (int) 0; + case -1: __PYX_VERIFY_RETURN_INT(int, sdigit, (sdigit) (-(sdigit)digits[0])) + case 1: __PYX_VERIFY_RETURN_INT(int, digit, +digits[0]) + case -2: + if (8 * sizeof(int) - 1 > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { + return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 2: + if (8 * sizeof(int) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { + return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case -3: + if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { + return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 3: + if (8 * sizeof(int) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { + return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case -4: + if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) { + return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + case 4: + if (8 * sizeof(int) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) { + return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); + } + } + break; + } +#endif + if (sizeof(int) <= sizeof(long)) { + __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { +#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) + PyErr_SetString(PyExc_RuntimeError, + "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); +#else + int val; + PyObject *v = __Pyx_PyNumber_IntOrLong(x); + #if PY_MAJOR_VERSION < 3 + if (likely(v) && !PyLong_Check(v)) { + PyObject *tmp = v; + v = PyNumber_Long(tmp); + Py_DECREF(tmp); + } + #endif + if (likely(v)) { + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + int ret = _PyLong_AsByteArray((PyLongObject *)v, + bytes, sizeof(val), + is_little, !is_unsigned); + Py_DECREF(v); + if (likely(!ret)) + return val; + } +#endif + return (int) -1; + } + } else { + int val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (int) -1; + val = __Pyx_PyInt_As_int(tmp); + Py_DECREF(tmp); + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to int"); + return (int) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to int"); + return (int) -1; +} + +/* CIntFromPy */ + static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { + const long neg_one = (long) -1, const_zero = (long) 0; + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if (sizeof(long) < sizeof(long)) { + __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (long) val; + } + } else +#endif + if (likely(PyLong_Check(x))) { + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (long) 0; + case 1: __PYX_VERIFY_RETURN_INT(long, digit, digits[0]) + case 2: + if (8 * sizeof(long) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) >= 2 * PyLong_SHIFT) { + return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + case 3: + if (8 * sizeof(long) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) >= 3 * PyLong_SHIFT) { + return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + case 4: + if (8 * sizeof(long) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) >= 4 * PyLong_SHIFT) { + return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); + } + } + break; + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (long) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if (sizeof(long) <= sizeof(unsigned long)) { + __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (long) 0; + case -1: __PYX_VERIFY_RETURN_INT(long, sdigit, (sdigit) (-(sdigit)digits[0])) + case 1: __PYX_VERIFY_RETURN_INT(long, digit, +digits[0]) + case -2: + if (8 * sizeof(long) - 1 > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { + return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 2: + if (8 * sizeof(long) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { + return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case -3: + if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { + return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 3: + if (8 * sizeof(long) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { + return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case -4: + if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { + return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + case 4: + if (8 * sizeof(long) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { + return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); + } + } + break; + } +#endif + if (sizeof(long) <= sizeof(long)) { + __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { +#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) + PyErr_SetString(PyExc_RuntimeError, + "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); +#else + long val; + PyObject *v = __Pyx_PyNumber_IntOrLong(x); + #if PY_MAJOR_VERSION < 3 + if (likely(v) && !PyLong_Check(v)) { + PyObject *tmp = v; + v = PyNumber_Long(tmp); + Py_DECREF(tmp); + } + #endif + if (likely(v)) { + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + int ret = _PyLong_AsByteArray((PyLongObject *)v, + bytes, sizeof(val), + is_little, !is_unsigned); + Py_DECREF(v); + if (likely(!ret)) + return val; + } +#endif + return (long) -1; + } + } else { + long val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (long) -1; + val = __Pyx_PyInt_As_long(tmp); + Py_DECREF(tmp); + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to long"); + return (long) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to long"); + return (long) -1; +} + +/* CIntFromPy */ + static CYTHON_INLINE PY_LONG_LONG __Pyx_PyInt_As_PY_LONG_LONG(PyObject *x) { + const PY_LONG_LONG neg_one = (PY_LONG_LONG) -1, const_zero = (PY_LONG_LONG) 0; + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if (sizeof(PY_LONG_LONG) < sizeof(long)) { + __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (PY_LONG_LONG) val; + } + } else +#endif + if (likely(PyLong_Check(x))) { + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (PY_LONG_LONG) 0; + case 1: __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, digit, digits[0]) + case 2: + if (8 * sizeof(PY_LONG_LONG) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(PY_LONG_LONG) >= 2 * PyLong_SHIFT) { + return (PY_LONG_LONG) (((((PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (PY_LONG_LONG)digits[0])); + } + } + break; + case 3: + if (8 * sizeof(PY_LONG_LONG) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(PY_LONG_LONG) >= 3 * PyLong_SHIFT) { + return (PY_LONG_LONG) (((((((PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (PY_LONG_LONG)digits[0])); + } + } + break; + case 4: + if (8 * sizeof(PY_LONG_LONG) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(PY_LONG_LONG) >= 4 * PyLong_SHIFT) { + return (PY_LONG_LONG) (((((((((PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (PY_LONG_LONG)digits[0])); + } + } + break; + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (PY_LONG_LONG) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if (sizeof(PY_LONG_LONG) <= sizeof(unsigned long)) { + __PYX_VERIFY_RETURN_INT_EXC(PY_LONG_LONG, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if (sizeof(PY_LONG_LONG) <= sizeof(unsigned PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(PY_LONG_LONG, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (PY_LONG_LONG) 0; + case -1: __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, sdigit, (sdigit) (-(sdigit)digits[0])) + case 1: __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, digit, +digits[0]) + case -2: + if (8 * sizeof(PY_LONG_LONG) - 1 > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) { + return (PY_LONG_LONG) (((PY_LONG_LONG)-1)*(((((PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (PY_LONG_LONG)digits[0]))); + } + } + break; + case 2: + if (8 * sizeof(PY_LONG_LONG) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) { + return (PY_LONG_LONG) ((((((PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (PY_LONG_LONG)digits[0]))); + } + } + break; + case -3: + if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) { + return (PY_LONG_LONG) (((PY_LONG_LONG)-1)*(((((((PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (PY_LONG_LONG)digits[0]))); + } + } + break; + case 3: + if (8 * sizeof(PY_LONG_LONG) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) { + return (PY_LONG_LONG) ((((((((PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (PY_LONG_LONG)digits[0]))); + } + } + break; + case -4: + if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) { + return (PY_LONG_LONG) (((PY_LONG_LONG)-1)*(((((((((PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (PY_LONG_LONG)digits[0]))); + } + } + break; + case 4: + if (8 * sizeof(PY_LONG_LONG) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(PY_LONG_LONG, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) { + return (PY_LONG_LONG) ((((((((((PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (PY_LONG_LONG)digits[0]))); + } + } + break; + } +#endif + if (sizeof(PY_LONG_LONG) <= sizeof(long)) { + __PYX_VERIFY_RETURN_INT_EXC(PY_LONG_LONG, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if (sizeof(PY_LONG_LONG) <= sizeof(PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(PY_LONG_LONG, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { +#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) + PyErr_SetString(PyExc_RuntimeError, + "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); +#else + PY_LONG_LONG val; + PyObject *v = __Pyx_PyNumber_IntOrLong(x); + #if PY_MAJOR_VERSION < 3 + if (likely(v) && !PyLong_Check(v)) { + PyObject *tmp = v; + v = PyNumber_Long(tmp); + Py_DECREF(tmp); + } + #endif + if (likely(v)) { + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + int ret = _PyLong_AsByteArray((PyLongObject *)v, + bytes, sizeof(val), + is_little, !is_unsigned); + Py_DECREF(v); + if (likely(!ret)) + return val; + } +#endif + return (PY_LONG_LONG) -1; + } + } else { + PY_LONG_LONG val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (PY_LONG_LONG) -1; + val = __Pyx_PyInt_As_PY_LONG_LONG(tmp); + Py_DECREF(tmp); + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to PY_LONG_LONG"); + return (PY_LONG_LONG) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to PY_LONG_LONG"); + return (PY_LONG_LONG) -1; +} + +/* CIntFromPy */ + static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_PyInt_As_unsigned_PY_LONG_LONG(PyObject *x) { + const unsigned PY_LONG_LONG neg_one = (unsigned PY_LONG_LONG) -1, const_zero = (unsigned PY_LONG_LONG) 0; + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if (sizeof(unsigned PY_LONG_LONG) < sizeof(long)) { + __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (unsigned PY_LONG_LONG) val; + } + } else +#endif + if (likely(PyLong_Check(x))) { + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (unsigned PY_LONG_LONG) 0; + case 1: __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, digit, digits[0]) + case 2: + if (8 * sizeof(unsigned PY_LONG_LONG) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(unsigned PY_LONG_LONG) >= 2 * PyLong_SHIFT) { + return (unsigned PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + } + } + break; + case 3: + if (8 * sizeof(unsigned PY_LONG_LONG) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(unsigned PY_LONG_LONG) >= 3 * PyLong_SHIFT) { + return (unsigned PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + } + } + break; + case 4: + if (8 * sizeof(unsigned PY_LONG_LONG) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(unsigned PY_LONG_LONG) >= 4 * PyLong_SHIFT) { + return (unsigned PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0])); + } + } + break; + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (unsigned PY_LONG_LONG) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if (sizeof(unsigned PY_LONG_LONG) <= sizeof(unsigned long)) { + __PYX_VERIFY_RETURN_INT_EXC(unsigned PY_LONG_LONG, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if (sizeof(unsigned PY_LONG_LONG) <= sizeof(unsigned PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(unsigned PY_LONG_LONG, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (unsigned PY_LONG_LONG) 0; + case -1: __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, sdigit, (sdigit) (-(sdigit)digits[0])) + case 1: __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, digit, +digits[0]) + case -2: + if (8 * sizeof(unsigned PY_LONG_LONG) - 1 > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(unsigned PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) { + return (unsigned PY_LONG_LONG) (((unsigned PY_LONG_LONG)-1)*(((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]))); + } + } + break; + case 2: + if (8 * sizeof(unsigned PY_LONG_LONG) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(unsigned PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) { + return (unsigned PY_LONG_LONG) ((((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]))); + } + } + break; + case -3: + if (8 * sizeof(unsigned PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(unsigned PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) { + return (unsigned PY_LONG_LONG) (((unsigned PY_LONG_LONG)-1)*(((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]))); + } + } + break; + case 3: + if (8 * sizeof(unsigned PY_LONG_LONG) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(unsigned PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) { + return (unsigned PY_LONG_LONG) ((((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]))); + } + } + break; + case -4: + if (8 * sizeof(unsigned PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(unsigned PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) { + return (unsigned PY_LONG_LONG) (((unsigned PY_LONG_LONG)-1)*(((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]))); + } + } + break; + case 4: + if (8 * sizeof(unsigned PY_LONG_LONG) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(unsigned PY_LONG_LONG, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(unsigned PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) { + return (unsigned PY_LONG_LONG) ((((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]))); + } + } + break; + } +#endif + if (sizeof(unsigned PY_LONG_LONG) <= sizeof(long)) { + __PYX_VERIFY_RETURN_INT_EXC(unsigned PY_LONG_LONG, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if (sizeof(unsigned PY_LONG_LONG) <= sizeof(PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(unsigned PY_LONG_LONG, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { +#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) + PyErr_SetString(PyExc_RuntimeError, + "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); +#else + unsigned PY_LONG_LONG val; + PyObject *v = __Pyx_PyNumber_IntOrLong(x); + #if PY_MAJOR_VERSION < 3 + if (likely(v) && !PyLong_Check(v)) { + PyObject *tmp = v; + v = PyNumber_Long(tmp); + Py_DECREF(tmp); + } + #endif + if (likely(v)) { + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + int ret = _PyLong_AsByteArray((PyLongObject *)v, + bytes, sizeof(val), + is_little, !is_unsigned); + Py_DECREF(v); + if (likely(!ret)) + return val; + } +#endif + return (unsigned PY_LONG_LONG) -1; + } + } else { + unsigned PY_LONG_LONG val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (unsigned PY_LONG_LONG) -1; + val = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(tmp); + Py_DECREF(tmp); + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to unsigned PY_LONG_LONG"); + return (unsigned PY_LONG_LONG) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to unsigned PY_LONG_LONG"); + return (unsigned PY_LONG_LONG) -1; +} + +/* CIntFromPy */ + static CYTHON_INLINE npy_uint32 __Pyx_PyInt_As_npy_uint32(PyObject *x) { + const npy_uint32 neg_one = (npy_uint32) -1, const_zero = (npy_uint32) 0; + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if (sizeof(npy_uint32) < sizeof(long)) { + __PYX_VERIFY_RETURN_INT(npy_uint32, long, PyInt_AS_LONG(x)) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + goto raise_neg_overflow; + } + return (npy_uint32) val; + } + } else +#endif + if (likely(PyLong_Check(x))) { + if (is_unsigned) { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (npy_uint32) 0; + case 1: __PYX_VERIFY_RETURN_INT(npy_uint32, digit, digits[0]) + case 2: + if (8 * sizeof(npy_uint32) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(npy_uint32, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(npy_uint32) >= 2 * PyLong_SHIFT) { + return (npy_uint32) (((((npy_uint32)digits[1]) << PyLong_SHIFT) | (npy_uint32)digits[0])); + } + } + break; + case 3: + if (8 * sizeof(npy_uint32) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(npy_uint32, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(npy_uint32) >= 3 * PyLong_SHIFT) { + return (npy_uint32) (((((((npy_uint32)digits[2]) << PyLong_SHIFT) | (npy_uint32)digits[1]) << PyLong_SHIFT) | (npy_uint32)digits[0])); + } + } + break; + case 4: + if (8 * sizeof(npy_uint32) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(npy_uint32, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(npy_uint32) >= 4 * PyLong_SHIFT) { + return (npy_uint32) (((((((((npy_uint32)digits[3]) << PyLong_SHIFT) | (npy_uint32)digits[2]) << PyLong_SHIFT) | (npy_uint32)digits[1]) << PyLong_SHIFT) | (npy_uint32)digits[0])); + } + } + break; + } +#endif +#if CYTHON_COMPILING_IN_CPYTHON + if (unlikely(Py_SIZE(x) < 0)) { + goto raise_neg_overflow; + } +#else + { + int result = PyObject_RichCompareBool(x, Py_False, Py_LT); + if (unlikely(result < 0)) + return (npy_uint32) -1; + if (unlikely(result == 1)) + goto raise_neg_overflow; + } +#endif + if (sizeof(npy_uint32) <= sizeof(unsigned long)) { + __PYX_VERIFY_RETURN_INT_EXC(npy_uint32, unsigned long, PyLong_AsUnsignedLong(x)) +#ifdef HAVE_LONG_LONG + } else if (sizeof(npy_uint32) <= sizeof(unsigned PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(npy_uint32, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) +#endif + } + } else { +#if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)x)->ob_digit; + switch (Py_SIZE(x)) { + case 0: return (npy_uint32) 0; + case -1: __PYX_VERIFY_RETURN_INT(npy_uint32, sdigit, (sdigit) (-(sdigit)digits[0])) + case 1: __PYX_VERIFY_RETURN_INT(npy_uint32, digit, +digits[0]) + case -2: + if (8 * sizeof(npy_uint32) - 1 > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(npy_uint32, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(npy_uint32) - 1 > 2 * PyLong_SHIFT) { + return (npy_uint32) (((npy_uint32)-1)*(((((npy_uint32)digits[1]) << PyLong_SHIFT) | (npy_uint32)digits[0]))); + } + } + break; + case 2: + if (8 * sizeof(npy_uint32) > 1 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(npy_uint32, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(npy_uint32) - 1 > 2 * PyLong_SHIFT) { + return (npy_uint32) ((((((npy_uint32)digits[1]) << PyLong_SHIFT) | (npy_uint32)digits[0]))); + } + } + break; + case -3: + if (8 * sizeof(npy_uint32) - 1 > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(npy_uint32, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(npy_uint32) - 1 > 3 * PyLong_SHIFT) { + return (npy_uint32) (((npy_uint32)-1)*(((((((npy_uint32)digits[2]) << PyLong_SHIFT) | (npy_uint32)digits[1]) << PyLong_SHIFT) | (npy_uint32)digits[0]))); + } + } + break; + case 3: + if (8 * sizeof(npy_uint32) > 2 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(npy_uint32, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(npy_uint32) - 1 > 3 * PyLong_SHIFT) { + return (npy_uint32) ((((((((npy_uint32)digits[2]) << PyLong_SHIFT) | (npy_uint32)digits[1]) << PyLong_SHIFT) | (npy_uint32)digits[0]))); + } + } + break; + case -4: + if (8 * sizeof(npy_uint32) - 1 > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(npy_uint32, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(npy_uint32) - 1 > 4 * PyLong_SHIFT) { + return (npy_uint32) (((npy_uint32)-1)*(((((((((npy_uint32)digits[3]) << PyLong_SHIFT) | (npy_uint32)digits[2]) << PyLong_SHIFT) | (npy_uint32)digits[1]) << PyLong_SHIFT) | (npy_uint32)digits[0]))); + } + } + break; + case 4: + if (8 * sizeof(npy_uint32) > 3 * PyLong_SHIFT) { + if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { + __PYX_VERIFY_RETURN_INT(npy_uint32, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) + } else if (8 * sizeof(npy_uint32) - 1 > 4 * PyLong_SHIFT) { + return (npy_uint32) ((((((((((npy_uint32)digits[3]) << PyLong_SHIFT) | (npy_uint32)digits[2]) << PyLong_SHIFT) | (npy_uint32)digits[1]) << PyLong_SHIFT) | (npy_uint32)digits[0]))); + } + } + break; + } +#endif + if (sizeof(npy_uint32) <= sizeof(long)) { + __PYX_VERIFY_RETURN_INT_EXC(npy_uint32, long, PyLong_AsLong(x)) +#ifdef HAVE_LONG_LONG + } else if (sizeof(npy_uint32) <= sizeof(PY_LONG_LONG)) { + __PYX_VERIFY_RETURN_INT_EXC(npy_uint32, PY_LONG_LONG, PyLong_AsLongLong(x)) +#endif + } + } + { +#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) + PyErr_SetString(PyExc_RuntimeError, + "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); +#else + npy_uint32 val; + PyObject *v = __Pyx_PyNumber_IntOrLong(x); + #if PY_MAJOR_VERSION < 3 + if (likely(v) && !PyLong_Check(v)) { + PyObject *tmp = v; + v = PyNumber_Long(tmp); + Py_DECREF(tmp); + } + #endif + if (likely(v)) { + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + int ret = _PyLong_AsByteArray((PyLongObject *)v, + bytes, sizeof(val), + is_little, !is_unsigned); + Py_DECREF(v); + if (likely(!ret)) + return val; + } +#endif + return (npy_uint32) -1; + } + } else { + npy_uint32 val; + PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); + if (!tmp) return (npy_uint32) -1; + val = __Pyx_PyInt_As_npy_uint32(tmp); + Py_DECREF(tmp); + return val; + } +raise_overflow: + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to npy_uint32"); + return (npy_uint32) -1; +raise_neg_overflow: + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to npy_uint32"); + return (npy_uint32) -1; +} + +/* FastTypeChecks */ + #if CYTHON_COMPILING_IN_CPYTHON +static int __Pyx_InBases(PyTypeObject *a, PyTypeObject *b) { + while (a) { + a = a->tp_base; + if (a == b) + return 1; + } + return b == &PyBaseObject_Type; +} +static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b) { + PyObject *mro; + if (a == b) return 1; + mro = a->tp_mro; + if (likely(mro)) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(mro); + for (i = 0; i < n; i++) { + if (PyTuple_GET_ITEM(mro, i) == (PyObject *)b) + return 1; + } + return 0; + } + return __Pyx_InBases(a, b); +} +#if PY_MAJOR_VERSION == 2 +static int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject* exc_type2) { + PyObject *exception, *value, *tb; + int res; + __Pyx_PyThreadState_declare + __Pyx_PyThreadState_assign + __Pyx_ErrFetch(&exception, &value, &tb); + res = exc_type1 ? PyObject_IsSubclass(err, exc_type1) : 0; + if (unlikely(res == -1)) { + PyErr_WriteUnraisable(err); + res = 0; + } + if (!res) { + res = PyObject_IsSubclass(err, exc_type2); + if (unlikely(res == -1)) { + PyErr_WriteUnraisable(err); + res = 0; + } + } + __Pyx_ErrRestore(exception, value, tb); + return res; +} +#else +static CYTHON_INLINE int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject *exc_type2) { + int res = exc_type1 ? __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type1) : 0; + if (!res) { + res = __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type2); + } + return res; +} +#endif +static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject* exc_type) { + if (likely(err == exc_type)) return 1; + if (likely(PyExceptionClass_Check(err))) { + return __Pyx_inner_PyErr_GivenExceptionMatches2(err, NULL, exc_type); + } + return PyErr_GivenExceptionMatches(err, exc_type); +} +static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *exc_type1, PyObject *exc_type2) { + if (likely(err == exc_type1 || err == exc_type2)) return 1; + if (likely(PyExceptionClass_Check(err))) { + return __Pyx_inner_PyErr_GivenExceptionMatches2(err, exc_type1, exc_type2); + } + return (PyErr_GivenExceptionMatches(err, exc_type1) || PyErr_GivenExceptionMatches(err, exc_type2)); +} +#endif + +/* CheckBinaryVersion */ + static int __Pyx_check_binary_version(void) { + char ctversion[4], rtversion[4]; + PyOS_snprintf(ctversion, 4, "%d.%d", PY_MAJOR_VERSION, PY_MINOR_VERSION); + PyOS_snprintf(rtversion, 4, "%s", Py_GetVersion()); + if (ctversion[0] != rtversion[0] || ctversion[2] != rtversion[2]) { + char message[200]; + PyOS_snprintf(message, sizeof(message), + "compiletime version %s of module '%.100s' " + "does not match runtime version %s", + ctversion, __Pyx_MODULE_NAME, rtversion); + return PyErr_WarnEx(NULL, message, 1); + } + return 0; +} + +/* ModuleImport */ + #ifndef __PYX_HAVE_RT_ImportModule +#define __PYX_HAVE_RT_ImportModule +static PyObject *__Pyx_ImportModule(const char *name) { + PyObject *py_name = 0; + PyObject *py_module = 0; + py_name = __Pyx_PyIdentifier_FromString(name); + if (!py_name) + goto bad; + py_module = PyImport_Import(py_name); + Py_DECREF(py_name); + return py_module; +bad: + Py_XDECREF(py_name); + return 0; +} +#endif + +/* TypeImport */ + #ifndef __PYX_HAVE_RT_ImportType +#define __PYX_HAVE_RT_ImportType +static PyTypeObject *__Pyx_ImportType(const char *module_name, const char *class_name, + size_t size, int strict) +{ + PyObject *py_module = 0; + PyObject *result = 0; + PyObject *py_name = 0; + char warning[200]; + Py_ssize_t basicsize; +#ifdef Py_LIMITED_API + PyObject *py_basicsize; +#endif + py_module = __Pyx_ImportModule(module_name); + if (!py_module) + goto bad; + py_name = __Pyx_PyIdentifier_FromString(class_name); + if (!py_name) + goto bad; + result = PyObject_GetAttr(py_module, py_name); + Py_DECREF(py_name); + py_name = 0; + Py_DECREF(py_module); + py_module = 0; + if (!result) + goto bad; + if (!PyType_Check(result)) { + PyErr_Format(PyExc_TypeError, + "%.200s.%.200s is not a type object", + module_name, class_name); + goto bad; + } +#ifndef Py_LIMITED_API + basicsize = ((PyTypeObject *)result)->tp_basicsize; +#else + py_basicsize = PyObject_GetAttrString(result, "__basicsize__"); + if (!py_basicsize) + goto bad; + basicsize = PyLong_AsSsize_t(py_basicsize); + Py_DECREF(py_basicsize); + py_basicsize = 0; + if (basicsize == (Py_ssize_t)-1 && PyErr_Occurred()) + goto bad; +#endif + if (!strict && (size_t)basicsize > size) { + PyOS_snprintf(warning, sizeof(warning), + "%s.%s size changed, may indicate binary incompatibility. Expected %zd, got %zd", + module_name, class_name, basicsize, size); + if (PyErr_WarnEx(NULL, warning, 0) < 0) goto bad; + } + else if ((size_t)basicsize != size) { + PyErr_Format(PyExc_ValueError, + "%.200s.%.200s has the wrong size, try recompiling. Expected %zd, got %zd", + module_name, class_name, basicsize, size); + goto bad; + } + return (PyTypeObject *)result; +bad: + Py_XDECREF(py_module); + Py_XDECREF(result); + return NULL; +} +#endif + +/* VoidPtrImport */ + #ifndef __PYX_HAVE_RT_ImportVoidPtr +#define __PYX_HAVE_RT_ImportVoidPtr +static int __Pyx_ImportVoidPtr(PyObject *module, const char *name, void **p, const char *sig) { + PyObject *d = 0; + PyObject *cobj = 0; + d = PyObject_GetAttrString(module, (char *)"__pyx_capi__"); + if (!d) + goto bad; + cobj = PyDict_GetItemString(d, name); + if (!cobj) { + PyErr_Format(PyExc_ImportError, + "%.200s does not export expected C variable %.200s", + PyModule_GetName(module), name); + goto bad; + } +#if PY_VERSION_HEX >= 0x02070000 + if (!PyCapsule_IsValid(cobj, sig)) { + PyErr_Format(PyExc_TypeError, + "C variable %.200s.%.200s has wrong signature (expected %.500s, got %.500s)", + PyModule_GetName(module), name, sig, PyCapsule_GetName(cobj)); + goto bad; + } + *p = PyCapsule_GetPointer(cobj, sig); +#else + {const char *desc, *s1, *s2; + desc = (const char *)PyCObject_GetDesc(cobj); + if (!desc) + goto bad; + s1 = desc; s2 = sig; + while (*s1 != '\0' && *s1 == *s2) { s1++; s2++; } + if (*s1 != *s2) { + PyErr_Format(PyExc_TypeError, + "C variable %.200s.%.200s has wrong signature (expected %.500s, got %.500s)", + PyModule_GetName(module), name, sig, desc); + goto bad; + } + *p = PyCObject_AsVoidPtr(cobj);} +#endif + if (!(*p)) + goto bad; + Py_DECREF(d); + return 0; +bad: + Py_XDECREF(d); + return -1; +} +#endif + +/* FunctionImport */ + #ifndef __PYX_HAVE_RT_ImportFunction +#define __PYX_HAVE_RT_ImportFunction +static int __Pyx_ImportFunction(PyObject *module, const char *funcname, void (**f)(void), const char *sig) { + PyObject *d = 0; + PyObject *cobj = 0; + union { + void (*fp)(void); + void *p; + } tmp; + d = PyObject_GetAttrString(module, (char *)"__pyx_capi__"); + if (!d) + goto bad; + cobj = PyDict_GetItemString(d, funcname); + if (!cobj) { + PyErr_Format(PyExc_ImportError, + "%.200s does not export expected C function %.200s", + PyModule_GetName(module), funcname); + goto bad; + } +#if PY_VERSION_HEX >= 0x02070000 + if (!PyCapsule_IsValid(cobj, sig)) { + PyErr_Format(PyExc_TypeError, + "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)", + PyModule_GetName(module), funcname, sig, PyCapsule_GetName(cobj)); + goto bad; + } + tmp.p = PyCapsule_GetPointer(cobj, sig); +#else + {const char *desc, *s1, *s2; + desc = (const char *)PyCObject_GetDesc(cobj); + if (!desc) + goto bad; + s1 = desc; s2 = sig; + while (*s1 != '\0' && *s1 == *s2) { s1++; s2++; } + if (*s1 != *s2) { + PyErr_Format(PyExc_TypeError, + "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)", + PyModule_GetName(module), funcname, sig, desc); + goto bad; + } + tmp.p = PyCObject_AsVoidPtr(cobj);} +#endif + *f = tmp.fp; + if (!(*f)) + goto bad; + Py_DECREF(d); + return 0; +bad: + Py_XDECREF(d); + return -1; +} +#endif + +/* InitStrings */ + static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { + while (t->p) { + #if PY_MAJOR_VERSION < 3 + if (t->is_unicode) { + *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); + } else if (t->intern) { + *t->p = PyString_InternFromString(t->s); + } else { + *t->p = PyString_FromStringAndSize(t->s, t->n - 1); + } + #else + if (t->is_unicode | t->is_str) { + if (t->intern) { + *t->p = PyUnicode_InternFromString(t->s); + } else if (t->encoding) { + *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL); + } else { + *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1); + } + } else { + *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1); + } + #endif + if (!*t->p) + return -1; + if (PyObject_Hash(*t->p) == -1) + PyErr_Clear(); + ++t; + } + return 0; +} + +static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) { + return __Pyx_PyUnicode_FromStringAndSize(c_str, (Py_ssize_t)strlen(c_str)); +} +static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject* o) { + Py_ssize_t ignore; + return __Pyx_PyObject_AsStringAndSize(o, &ignore); +} +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT +#if !CYTHON_PEP393_ENABLED +static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { + char* defenc_c; + PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL); + if (!defenc) return NULL; + defenc_c = PyBytes_AS_STRING(defenc); +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + { + char* end = defenc_c + PyBytes_GET_SIZE(defenc); + char* c; + for (c = defenc_c; c < end; c++) { + if ((unsigned char) (*c) >= 128) { + PyUnicode_AsASCIIString(o); + return NULL; + } + } + } +#endif + *length = PyBytes_GET_SIZE(defenc); + return defenc_c; +} +#else +static CYTHON_INLINE const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { + if (unlikely(__Pyx_PyUnicode_READY(o) == -1)) return NULL; +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + if (likely(PyUnicode_IS_ASCII(o))) { + *length = PyUnicode_GET_LENGTH(o); + return PyUnicode_AsUTF8(o); + } else { + PyUnicode_AsASCIIString(o); + return NULL; + } +#else + return PyUnicode_AsUTF8AndSize(o, length); +#endif +} +#endif +#endif +static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { +#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT + if ( +#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII + __Pyx_sys_getdefaultencoding_not_ascii && +#endif + PyUnicode_Check(o)) { + return __Pyx_PyUnicode_AsStringAndSize(o, length); + } else +#endif +#if (!CYTHON_COMPILING_IN_PYPY) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE)) + if (PyByteArray_Check(o)) { + *length = PyByteArray_GET_SIZE(o); + return PyByteArray_AS_STRING(o); + } else +#endif + { + char* result; + int r = PyBytes_AsStringAndSize(o, &result, length); + if (unlikely(r < 0)) { + return NULL; + } else { + return result; + } + } +} +static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { + int is_true = x == Py_True; + if (is_true | (x == Py_False) | (x == Py_None)) return is_true; + else return PyObject_IsTrue(x); +} +static PyObject* __Pyx_PyNumber_IntOrLongWrongResultType(PyObject* result, const char* type_name) { +#if PY_MAJOR_VERSION >= 3 + if (PyLong_Check(result)) { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "__int__ returned non-int (type %.200s). " + "The ability to return an instance of a strict subclass of int " + "is deprecated, and may be removed in a future version of Python.", + Py_TYPE(result)->tp_name)) { + Py_DECREF(result); + return NULL; + } + return result; + } +#endif + PyErr_Format(PyExc_TypeError, + "__%.4s__ returned non-%.4s (type %.200s)", + type_name, type_name, Py_TYPE(result)->tp_name); + Py_DECREF(result); + return NULL; +} +static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) { +#if CYTHON_USE_TYPE_SLOTS + PyNumberMethods *m; +#endif + const char *name = NULL; + PyObject *res = NULL; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x) || PyLong_Check(x))) +#else + if (likely(PyLong_Check(x))) +#endif + return __Pyx_NewRef(x); +#if CYTHON_USE_TYPE_SLOTS + m = Py_TYPE(x)->tp_as_number; + #if PY_MAJOR_VERSION < 3 + if (m && m->nb_int) { + name = "int"; + res = m->nb_int(x); + } + else if (m && m->nb_long) { + name = "long"; + res = m->nb_long(x); + } + #else + if (likely(m && m->nb_int)) { + name = "int"; + res = m->nb_int(x); + } + #endif +#else + if (!PyBytes_CheckExact(x) && !PyUnicode_CheckExact(x)) { + res = PyNumber_Int(x); + } +#endif + if (likely(res)) { +#if PY_MAJOR_VERSION < 3 + if (unlikely(!PyInt_Check(res) && !PyLong_Check(res))) { +#else + if (unlikely(!PyLong_CheckExact(res))) { +#endif + return __Pyx_PyNumber_IntOrLongWrongResultType(res, name); + } + } + else if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_TypeError, + "an integer is required"); + } + return res; +} +static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { + Py_ssize_t ival; + PyObject *x; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_CheckExact(b))) { + if (sizeof(Py_ssize_t) >= sizeof(long)) + return PyInt_AS_LONG(b); + else + return PyInt_AsSsize_t(x); + } +#endif + if (likely(PyLong_CheckExact(b))) { + #if CYTHON_USE_PYLONG_INTERNALS + const digit* digits = ((PyLongObject*)b)->ob_digit; + const Py_ssize_t size = Py_SIZE(b); + if (likely(__Pyx_sst_abs(size) <= 1)) { + ival = likely(size) ? digits[0] : 0; + if (size == -1) ival = -ival; + return ival; + } else { + switch (size) { + case 2: + if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { + return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -2: + if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case 3: + if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { + return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -3: + if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case 4: + if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { + return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + case -4: + if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { + return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); + } + break; + } + } + #endif + return PyLong_AsSsize_t(b); + } + x = PyNumber_Index(b); + if (!x) return -1; + ival = PyInt_AsSsize_t(x); + Py_DECREF(x); + return ival; +} +static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) { + return PyInt_FromSize_t(ival); +} + + +#endif /* Py_PYTHON_H */ From 838e8334b71363d6d61c2ad71521418514874863 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Mon, 27 Nov 2017 22:06:42 +0530 Subject: [PATCH 06/22] resolves segmentation fault with multiple workers --- gensim/models/fasttext_inner.c | 1053 ++++++++++++++---------------- gensim/models/fasttext_inner.pyx | 23 +- 2 files changed, 499 insertions(+), 577 deletions(-) diff --git a/gensim/models/fasttext_inner.c b/gensim/models/fasttext_inner.c index 9f90c6296d..c80ea35789 100644 --- a/gensim/models/fasttext_inner.c +++ b/gensim/models/fasttext_inner.c @@ -1253,6 +1253,26 @@ static CYTHON_INLINE int __Pyx_PySequence_ContainsTF(PyObject* item, PyObject* s return unlikely(result < 0) ? result : (result == (eq == Py_EQ)); } +/* ListCompAppend.proto */ +#if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS +static CYTHON_INLINE int __Pyx_ListComp_Append(PyObject* list, PyObject* x) { + PyListObject* L = (PyListObject*) list; + Py_ssize_t len = Py_SIZE(list); + if (likely(L->allocated > len)) { + Py_INCREF(x); + PyList_SET_ITEM(list, len, x); + Py_SIZE(list) = len+1; + return 0; + } + return PyList_Append(list, x); +} +#else +#define __Pyx_ListComp_Append(L,x) PyList_Append(L,x) +#endif + +/* GetModuleGlobalName.proto */ +static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name); + /* PyFunctionFastCall.proto */ #if CYTHON_FAST_PYCALL #define __Pyx_PyFunction_FastCall(func, args, nargs)\ @@ -1271,28 +1291,6 @@ static CYTHON_INLINE PyObject *__Pyx_PyCFunction_FastCall(PyObject *func, PyObje #define __Pyx_PyCFunction_FastCall(func, args, nargs) (assert(0), NULL) #endif -/* GetItemInt.proto */ -#define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ - (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ - __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) :\ - (is_list ? (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL) :\ - __Pyx_GetItemInt_Generic(o, to_py_func(i)))) -#define __Pyx_GetItemInt_List(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ - (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ - __Pyx_GetItemInt_List_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\ - (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL)) -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, - int wraparound, int boundscheck); -#define __Pyx_GetItemInt_Tuple(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ - (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ - __Pyx_GetItemInt_Tuple_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\ - (PyErr_SetString(PyExc_IndexError, "tuple index out of range"), (PyObject*)NULL)) -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, - int wraparound, int boundscheck); -static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j); -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, - int is_list, int wraparound, int boundscheck); - /* RaiseException.proto */ static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); @@ -1359,9 +1357,6 @@ static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); /* ImportFrom.proto */ static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name); -/* GetModuleGlobalName.proto */ -static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name); - /* PyObjectCallMethO.proto */ #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); @@ -1636,8 +1631,8 @@ static unsigned PY_LONG_LONG (*__pyx_f_6gensim_6models_14word2vec_inner_random_i static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_6gensim_6models_14fasttext_inner_LOG_TABLE[0x3E8]; static int __pyx_v_6gensim_6models_14fasttext_inner_ONE; static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_6gensim_6models_14fasttext_inner_ONEF; -static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t *, __pyx_t_5numpy_uint32_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, unsigned PY_LONG_LONG, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ -static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t *, __pyx_t_5numpy_uint32_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ +static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint32_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, unsigned PY_LONG_LONG, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ +static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint32_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ #define __Pyx_MODULE_NAME "gensim.models.fasttext_inner" extern int __pyx_module_is_main_gensim__models__fasttext_inner; int __pyx_module_is_main_gensim__models__fasttext_inner = 0; @@ -1671,8 +1666,10 @@ static const char __pyx_k_test[] = "__test__"; static const char __pyx_k_word[] = "word"; static const char __pyx_k_work[] = "_work"; static const char __pyx_k_alpha[] = "alpha"; +static const char __pyx_k_array[] = "array"; static const char __pyx_k_codes[] = "codes"; static const char __pyx_k_d_res[] = "d_res"; +static const char __pyx_k_dtype[] = "dtype"; static const char __pyx_k_fblas[] = "fblas"; static const char __pyx_k_index[] = "index"; static const char __pyx_k_model[] = "model"; @@ -1688,6 +1685,7 @@ static const char __pyx_k_ngrams[] = "ngrams"; static const char __pyx_k_points[] = "points"; static const char __pyx_k_random[] = "random"; static const char __pyx_k_sample[] = "sample"; +static const char __pyx_k_uint32[] = "uint32"; static const char __pyx_k_window[] = "window"; static const char __pyx_k_work_2[] = "work"; static const char __pyx_k_alpha_2[] = "_alpha"; @@ -1695,7 +1693,6 @@ static const char __pyx_k_float32[] = "float32"; static const char __pyx_k_idx_end[] = "idx_end"; static const char __pyx_k_indexes[] = "indexes"; static const char __pyx_k_randint[] = "randint"; -static const char __pyx_k_subword[] = "subword"; static const char __pyx_k_syn1neg[] = "syn1neg"; static const char __pyx_k_vlookup[] = "vlookup"; static const char __pyx_k_codelens[] = "codelens"; @@ -1703,12 +1700,13 @@ static const char __pyx_k_expected[] = "expected"; static const char __pyx_k_matutils[] = "matutils"; static const char __pyx_k_negative[] = "negative"; static const char __pyx_k_sent_idx[] = "sent_idx"; +static const char __pyx_k_subwords[] = "subwords"; static const char __pyx_k_word2vec[] = "word2vec"; -static const char __pyx_k_word_idx[] = "word_idx"; static const char __pyx_k_cum_table[] = "cum_table"; static const char __pyx_k_enumerate[] = "enumerate"; static const char __pyx_k_idx_start[] = "idx_start"; static const char __pyx_k_sentences[] = "sentences"; +static const char __pyx_k_subword_i[] = "subword_i"; static const char __pyx_k_ValueError[] = "ValueError"; static const char __pyx_k_index2word[] = "index2word"; static const char __pyx_k_sample_int[] = "sample_int"; @@ -1758,6 +1756,7 @@ static PyObject *__pyx_n_s_ValueError; static PyObject *__pyx_n_s__12; static PyObject *__pyx_n_s_alpha; static PyObject *__pyx_n_s_alpha_2; +static PyObject *__pyx_n_s_array; static PyObject *__pyx_n_s_cline_in_traceback; static PyObject *__pyx_n_s_code; static PyObject *__pyx_n_s_codelens; @@ -1765,6 +1764,7 @@ static PyObject *__pyx_n_s_codes; static PyObject *__pyx_n_s_cum_table; static PyObject *__pyx_n_s_cum_table_len; static PyObject *__pyx_n_s_d_res; +static PyObject *__pyx_n_s_dtype; static PyObject *__pyx_n_s_effective_sentences; static PyObject *__pyx_n_s_effective_words; static PyObject *__pyx_n_s_enumerate; @@ -1817,7 +1817,8 @@ static PyObject *__pyx_n_s_sent_idx; static PyObject *__pyx_n_s_sentence_idx; static PyObject *__pyx_n_s_sentences; static PyObject *__pyx_n_s_size; -static PyObject *__pyx_n_s_subword; +static PyObject *__pyx_n_s_subword_i; +static PyObject *__pyx_n_s_subwords; static PyObject *__pyx_n_s_subwords_idx; static PyObject *__pyx_n_s_subwords_idx_len; static PyObject *__pyx_n_s_syn0_ngrams; @@ -1829,13 +1830,13 @@ static PyObject *__pyx_n_s_syn1neg; static PyObject *__pyx_n_s_test; static PyObject *__pyx_n_s_token; static PyObject *__pyx_n_s_train_batch_sg; +static PyObject *__pyx_n_s_uint32; static PyObject *__pyx_kp_u_unknown_dtype_code_in_numpy_pxd; static PyObject *__pyx_n_s_vlookup; static PyObject *__pyx_n_s_vocab; static PyObject *__pyx_n_s_window; static PyObject *__pyx_n_s_word; static PyObject *__pyx_n_s_word2vec; -static PyObject *__pyx_n_s_word_idx; static PyObject *__pyx_n_s_word_locks_ngrams; static PyObject *__pyx_n_s_word_locks_vocab; static PyObject *__pyx_n_s_word_subwords; @@ -1877,7 +1878,7 @@ static PyObject *__pyx_codeobj__16; * REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1neg, const int size, */ -static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_cum_table, unsigned PY_LONG_LONG __pyx_v_cum_table_len, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_vocab, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_ngrams, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word_index, __pyx_t_5numpy_uint32_t *__pyx_v_subwords_index, __pyx_t_5numpy_uint32_t const __pyx_v_subwords_len, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_l1, unsigned PY_LONG_LONG __pyx_v_next_random, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_vocab, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_ngrams) { +static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_cum_table, unsigned PY_LONG_LONG __pyx_v_cum_table_len, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_vocab, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_ngrams, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word_index, __pyx_t_5numpy_uint32_t const *__pyx_v_subwords_index, __pyx_t_5numpy_uint32_t const __pyx_v_subwords_len, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_l1, unsigned PY_LONG_LONG __pyx_v_next_random, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_vocab, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_ngrams) { __pyx_t_5numpy_uint32_t __pyx_v_word2_index; PY_LONG_LONG __pyx_v_row1; PY_LONG_LONG __pyx_v_row2; @@ -2285,7 +2286,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente * REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1, const int size, */ -static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int const __pyx_v_codelen, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_vocab, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_ngrams, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t *__pyx_v_subwords_index, __pyx_t_5numpy_uint32_t const __pyx_v_subwords_len, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_l1, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_vocab, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_ngrams) { +static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int const __pyx_v_codelen, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_vocab, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_ngrams, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_subwords_index, __pyx_t_5numpy_uint32_t const __pyx_v_subwords_len, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_l1, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_vocab, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_ngrams) { PY_LONG_LONG __pyx_v_b; __pyx_t_5numpy_uint32_t __pyx_v_word2_index; PY_LONG_LONG __pyx_v_row1; @@ -2689,16 +2690,16 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5numpy_uint32_t *__pyx_v_cum_table; unsigned PY_LONG_LONG __pyx_v_cum_table_len; unsigned PY_LONG_LONG __pyx_v_next_random; - __pyx_t_5numpy_uint32_t __pyx_v_subwords_idx_len[0x2710]; - __pyx_t_5numpy_uint32_t __pyx_v_subwords_idx[0x2710][0x3E8]; + int __pyx_v_subwords_idx_len[0x2710]; + __pyx_t_5numpy_uint32_t *__pyx_v_subwords_idx[0x2710]; PyObject *__pyx_v_vlookup = NULL; PyObject *__pyx_v_sent = NULL; PyObject *__pyx_v_token = NULL; PyObject *__pyx_v_word = NULL; - PyObject *__pyx_v_item = NULL; - __pyx_t_5numpy_uint32_t __pyx_v_word_idx; + PyObject *__pyx_v_subwords = NULL; PyObject *__pyx_v_word_subwords = NULL; - PyObject *__pyx_v_subword = NULL; + PyObject *__pyx_v_item = NULL; + PyObject *__pyx_v_subword_i = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; @@ -2716,13 +2717,15 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject *__pyx_t_13 = NULL; PyObject *__pyx_t_14 = NULL; __pyx_t_5numpy_uint32_t __pyx_t_15; - Py_ssize_t __pyx_t_16; - int __pyx_t_17; - PyObject *__pyx_t_18 = NULL; - int __pyx_t_19; + PyObject *__pyx_t_16 = NULL; + PyObject *__pyx_t_17 = NULL; + Py_ssize_t __pyx_t_18; + PyObject *(*__pyx_t_19)(PyObject *); int __pyx_t_20; int __pyx_t_21; int __pyx_t_22; + int __pyx_t_23; + int __pyx_t_24; __Pyx_RefNannySetupContext("train_batch_sg", 0); /* "gensim/models/fasttext_inner.pyx":131 @@ -2870,8 +2873,8 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_v_effective_words = 0; __pyx_v_effective_sentences = 0; - /* "gensim/models/fasttext_inner.pyx":172 - * cdef np.uint32_t subwords_idx[MAX_SENTENCE_LEN][MAX_SUBWORDS] + /* "gensim/models/fasttext_inner.pyx":173 + * * * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.syn1)) @@ -2880,21 +2883,21 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":173 + /* "gensim/models/fasttext_inner.pyx":174 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 173, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 174, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 173, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 174, __pyx_L1_error) __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":172 - * cdef np.uint32_t subwords_idx[MAX_SENTENCE_LEN][MAX_SUBWORDS] + /* "gensim/models/fasttext_inner.pyx":173 + * * * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.syn1)) @@ -2902,7 +2905,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":175 + /* "gensim/models/fasttext_inner.pyx":176 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -2912,46 +2915,46 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":176 + /* "gensim/models/fasttext_inner.pyx":177 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 176, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 177, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 176, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 177, __pyx_L1_error) __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":177 + /* "gensim/models/fasttext_inner.pyx":178 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) # <<<<<<<<<<<<<< * cum_table_len = len(model.cum_table) * if negative or sample: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 177, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 178, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 177, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 178, __pyx_L1_error) __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":178 + /* "gensim/models/fasttext_inner.pyx":179 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) # <<<<<<<<<<<<<< * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 178, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 179, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == ((Py_ssize_t)-1))) __PYX_ERR(0, 178, __pyx_L1_error) + __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == ((Py_ssize_t)-1))) __PYX_ERR(0, 179, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cum_table_len = __pyx_t_6; - /* "gensim/models/fasttext_inner.pyx":175 + /* "gensim/models/fasttext_inner.pyx":176 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -2960,7 +2963,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":179 + /* "gensim/models/fasttext_inner.pyx":180 * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -2978,41 +2981,41 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_L6_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":180 + /* "gensim/models/fasttext_inner.pyx":181 * cum_table_len = len(model.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 180, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 181, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 180, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 181, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 180, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 181, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 180, __pyx_L1_error) + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 181, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 180, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 181, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 180, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 181, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 180, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 181, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 180, __pyx_L1_error) + __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 181, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 180, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 181, __pyx_L1_error) __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_next_random = __pyx_t_9; - /* "gensim/models/fasttext_inner.pyx":179 + /* "gensim/models/fasttext_inner.pyx":180 * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -3021,42 +3024,42 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":183 + /* "gensim/models/fasttext_inner.pyx":184 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * l1 = np.PyArray_DATA(_l1) * */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 183, __pyx_L1_error) + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 184, __pyx_L1_error) __pyx_v_work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "gensim/models/fasttext_inner.pyx":184 + /* "gensim/models/fasttext_inner.pyx":185 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * l1 = np.PyArray_DATA(_l1) # <<<<<<<<<<<<<< * * # prepare C structures so we can go "full C" and release the Python GIL */ - if (!(likely(((__pyx_v__l1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__l1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 184, __pyx_L1_error) + if (!(likely(((__pyx_v__l1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__l1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 185, __pyx_L1_error) __pyx_v_l1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__l1))); - /* "gensim/models/fasttext_inner.pyx":187 + /* "gensim/models/fasttext_inner.pyx":188 * * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab # <<<<<<<<<<<<<< * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 187, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 188, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 187, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 188, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_vlookup = __pyx_t_3; __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":188 + /* "gensim/models/fasttext_inner.pyx":189 * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 # <<<<<<<<<<<<<< @@ -3065,7 +3068,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ (__pyx_v_sentence_idx[0]) = 0; - /* "gensim/models/fasttext_inner.pyx":189 + /* "gensim/models/fasttext_inner.pyx":190 * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: # <<<<<<<<<<<<<< @@ -3076,26 +3079,26 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_3 = __pyx_v_sentences; __Pyx_INCREF(__pyx_t_3); __pyx_t_6 = 0; __pyx_t_10 = NULL; } else { - __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 189, __pyx_L1_error) + __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 190, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 189, __pyx_L1_error) + __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 190, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_10)) { if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 189, __pyx_L1_error) + __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 190, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 189, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 190, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 189, __pyx_L1_error) + __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 190, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 189, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 190, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } @@ -3105,7 +3108,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 189, __pyx_L1_error) + else __PYX_ERR(0, 190, __pyx_L1_error) } break; } @@ -3114,18 +3117,18 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF_SET(__pyx_v_sent, __pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":190 + /* "gensim/models/fasttext_inner.pyx":191 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: */ - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 190, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 191, __pyx_L1_error) __pyx_t_7 = ((!__pyx_t_5) != 0); if (__pyx_t_7) { - /* "gensim/models/fasttext_inner.pyx":191 + /* "gensim/models/fasttext_inner.pyx":192 * for sent in sentences: * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged # <<<<<<<<<<<<<< @@ -3134,7 +3137,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L8_continue; - /* "gensim/models/fasttext_inner.pyx":190 + /* "gensim/models/fasttext_inner.pyx":191 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< @@ -3143,7 +3146,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":192 + /* "gensim/models/fasttext_inner.pyx":193 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< @@ -3154,26 +3157,26 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_8 = __pyx_v_sent; __Pyx_INCREF(__pyx_t_8); __pyx_t_11 = 0; __pyx_t_12 = NULL; } else { - __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 192, __pyx_L1_error) + __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 193, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 192, __pyx_L1_error) + __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 193, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_12)) { if (likely(PyList_CheckExact(__pyx_t_8))) { if (__pyx_t_11 >= PyList_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 192, __pyx_L1_error) + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 193, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 192, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 193, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_11 >= PyTuple_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 192, __pyx_L1_error) + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 193, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 192, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 193, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } @@ -3183,7 +3186,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 192, __pyx_L1_error) + else __PYX_ERR(0, 193, __pyx_L1_error) } break; } @@ -3192,16 +3195,16 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":193 + /* "gensim/models/fasttext_inner.pyx":194 * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: * word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window */ - __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 193, __pyx_L1_error) + __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 194, __pyx_L1_error) if ((__pyx_t_7 != 0)) { - __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 193, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 194, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __pyx_t_1 = __pyx_t_13; __pyx_t_13 = 0; @@ -3212,7 +3215,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":194 + /* "gensim/models/fasttext_inner.pyx":195 * for token in sent: * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< @@ -3223,7 +3226,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_t_7 != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":195 + /* "gensim/models/fasttext_inner.pyx":196 * word = vlookup[token] if token in vlookup else None * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window # <<<<<<<<<<<<<< @@ -3232,7 +3235,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":194 + /* "gensim/models/fasttext_inner.pyx":195 * for token in sent: * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< @@ -3241,7 +3244,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":196 + /* "gensim/models/fasttext_inner.pyx":197 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -3254,29 +3257,29 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = __pyx_t_7; goto __pyx_L15_bool_binop_done; } - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 196, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 197, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 196, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 197, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 196, __pyx_L1_error) + __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 197, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 196, __pyx_L1_error) + __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 197, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __pyx_t_5 = __pyx_t_7; __pyx_L15_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":197 + /* "gensim/models/fasttext_inner.pyx":198 * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): * continue # <<<<<<<<<<<<<< * indexes[effective_words] = word.index - * if hs: + * */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":196 + /* "gensim/models/fasttext_inner.pyx":197 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -3285,22 +3288,172 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":198 + /* "gensim/models/fasttext_inner.pyx":199 * if sample and word.sample_int < random_int32(&next_random): * continue * indexes[effective_words] = word.index # <<<<<<<<<<<<<< - * if hs: - * codelens[effective_words] = len(word.code) + * + * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] */ - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 198, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 199, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 198, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 199, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; (__pyx_v_indexes[__pyx_v_effective_words]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":199 - * continue + /* "gensim/models/fasttext_inner.pyx":201 * indexes[effective_words] = word.index + * + * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] # <<<<<<<<<<<<<< + * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) + * subwords_idx_len[effective_words] = (len(subwords) + 1) + */ + __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 201, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 201, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams_word); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 201, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 201, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_index2word); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 201, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 201, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __pyx_t_17 = PyObject_GetItem(__pyx_t_16, __pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 201, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_t_17); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 201, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + if (likely(PyList_CheckExact(__pyx_t_13)) || PyTuple_CheckExact(__pyx_t_13)) { + __pyx_t_17 = __pyx_t_13; __Pyx_INCREF(__pyx_t_17); __pyx_t_18 = 0; + __pyx_t_19 = NULL; + } else { + __pyx_t_18 = -1; __pyx_t_17 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 201, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_t_19 = Py_TYPE(__pyx_t_17)->tp_iternext; if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 201, __pyx_L1_error) + } + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + for (;;) { + if (likely(!__pyx_t_19)) { + if (likely(PyList_CheckExact(__pyx_t_17))) { + if (__pyx_t_18 >= PyList_GET_SIZE(__pyx_t_17)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_13 = PyList_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 201, __pyx_L1_error) + #else + __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 201, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + #endif + } else { + if (__pyx_t_18 >= PyTuple_GET_SIZE(__pyx_t_17)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 201, __pyx_L1_error) + #else + __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 201, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + #endif + } + } else { + __pyx_t_13 = __pyx_t_19(__pyx_t_17); + if (unlikely(!__pyx_t_13)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(0, 201, __pyx_L1_error) + } + break; + } + __Pyx_GOTREF(__pyx_t_13); + } + __Pyx_XDECREF_SET(__pyx_v_subword_i, __pyx_t_13); + __pyx_t_13 = 0; + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 201, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 201, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_v_subword_i); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 201, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_13))) __PYX_ERR(0, 201, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + } + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __Pyx_XDECREF_SET(__pyx_v_subwords, ((PyObject*)__pyx_t_14)); + __pyx_t_14 = 0; + + /* "gensim/models/fasttext_inner.pyx":202 + * + * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] + * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) # <<<<<<<<<<<<<< + * subwords_idx_len[effective_words] = (len(subwords) + 1) + * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) + */ + __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 202, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 202, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 202, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __pyx_t_13 = PyList_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 202, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __Pyx_GIVEREF(__pyx_t_14); + PyList_SET_ITEM(__pyx_t_13, 0, __pyx_t_14); + __pyx_t_14 = 0; + __pyx_t_14 = PyNumber_Add(__pyx_t_13, __pyx_v_subwords); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 202, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __pyx_t_13 = PyTuple_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 202, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __Pyx_GIVEREF(__pyx_t_14); + PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_14); + __pyx_t_14 = 0; + __pyx_t_14 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 202, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 202, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_uint32); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 202, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + if (PyDict_SetItem(__pyx_t_14, __pyx_n_s_dtype, __pyx_t_16) < 0) __PYX_ERR(0, 202, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + __pyx_t_16 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_t_13, __pyx_t_14); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 202, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + __Pyx_XDECREF_SET(__pyx_v_word_subwords, __pyx_t_16); + __pyx_t_16 = 0; + + /* "gensim/models/fasttext_inner.pyx":203 + * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] + * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) + * subwords_idx_len[effective_words] = (len(subwords) + 1) # <<<<<<<<<<<<<< + * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) + * + */ + __pyx_t_18 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 203, __pyx_L1_error) + (__pyx_v_subwords_idx_len[__pyx_v_effective_words]) = ((int)(__pyx_t_18 + 1)); + + /* "gensim/models/fasttext_inner.pyx":204 + * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) + * subwords_idx_len[effective_words] = (len(subwords) + 1) + * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) # <<<<<<<<<<<<<< + * + * if hs: + */ + if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 204, __pyx_L1_error) + (__pyx_v_subwords_idx[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_subwords))); + + /* "gensim/models/fasttext_inner.pyx":206 + * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) + * * if hs: # <<<<<<<<<<<<<< * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) @@ -3308,55 +3461,55 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":200 - * indexes[effective_words] = word.index + /* "gensim/models/fasttext_inner.pyx":207 + * * if hs: * codelens[effective_words] = len(word.code) # <<<<<<<<<<<<<< * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) */ - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 200, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_14); - __pyx_t_16 = PyObject_Length(__pyx_t_14); if (unlikely(__pyx_t_16 == ((Py_ssize_t)-1))) __PYX_ERR(0, 200, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - (__pyx_v_codelens[__pyx_v_effective_words]) = ((int)__pyx_t_16); + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 207, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __pyx_t_18 = PyObject_Length(__pyx_t_16); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 207, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + (__pyx_v_codelens[__pyx_v_effective_words]) = ((int)__pyx_t_18); - /* "gensim/models/fasttext_inner.pyx":201 + /* "gensim/models/fasttext_inner.pyx":208 * if hs: * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[effective_words] = np.PyArray_DATA(word.point) * */ - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 201, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_14); - if (!(likely(((__pyx_t_14) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_14, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 201, __pyx_L1_error) - (__pyx_v_codes[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_14))); - __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 208, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 208, __pyx_L1_error) + (__pyx_v_codes[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_16))); + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":202 + /* "gensim/models/fasttext_inner.pyx":209 * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * * effective_words += 1 */ - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 202, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_14); - if (!(likely(((__pyx_t_14) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_14, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 202, __pyx_L1_error) - (__pyx_v_points[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_14))); - __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 209, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 209, __pyx_L1_error) + (__pyx_v_points[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_16))); + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":199 - * continue - * indexes[effective_words] = word.index + /* "gensim/models/fasttext_inner.pyx":206 + * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) + * * if hs: # <<<<<<<<<<<<<< * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) */ } - /* "gensim/models/fasttext_inner.pyx":204 + /* "gensim/models/fasttext_inner.pyx":211 * points[effective_words] = np.PyArray_DATA(word.point) * * effective_words += 1 # <<<<<<<<<<<<<< @@ -3365,7 +3518,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_effective_words = (__pyx_v_effective_words + 1); - /* "gensim/models/fasttext_inner.pyx":205 + /* "gensim/models/fasttext_inner.pyx":212 * * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< @@ -3375,7 +3528,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_effective_words == 0x2710) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":206 + /* "gensim/models/fasttext_inner.pyx":213 * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< @@ -3384,7 +3537,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L12_break; - /* "gensim/models/fasttext_inner.pyx":205 + /* "gensim/models/fasttext_inner.pyx":212 * * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< @@ -3393,7 +3546,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":192 + /* "gensim/models/fasttext_inner.pyx":193 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< @@ -3405,7 +3558,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_L12_break:; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":211 + /* "gensim/models/fasttext_inner.pyx":218 * # across sentence boundaries. * # indices of sentence number X are between tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 218, __pyx_L1_error) + __pyx_t_6 = -1; __pyx_t_16 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 225, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __pyx_t_10 = Py_TYPE(__pyx_t_16)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 225, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; for (;;) { if (likely(!__pyx_t_10)) { - if (likely(PyList_CheckExact(__pyx_t_14))) { - if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_14)) break; + if (likely(PyList_CheckExact(__pyx_t_16))) { + if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_16)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_14, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 218, __pyx_L1_error) + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 225, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_14, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 218, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 225, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } else { - if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_14)) break; + if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_16)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_14, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 218, __pyx_L1_error) + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 225, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_14, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 218, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 225, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } } else { - __pyx_t_3 = __pyx_t_10(__pyx_t_14); + __pyx_t_3 = __pyx_t_10(__pyx_t_16); if (unlikely(!__pyx_t_3)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 218, __pyx_L1_error) + else __PYX_ERR(0, 225, __pyx_L1_error) } break; } @@ -3577,17 +3730,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "gensim/models/fasttext_inner.pyx":219 + /* "gensim/models/fasttext_inner.pyx":226 * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * - * for i in range(effective_words): + * with nogil: */ - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 219, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 226, __pyx_L1_error) (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":218 + /* "gensim/models/fasttext_inner.pyx":225 * * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): # <<<<<<<<<<<<<< @@ -3595,159 +3748,11 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON * */ } - __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":221 + /* "gensim/models/fasttext_inner.pyx":228 * reduced_windows[i] = item * - * for i in range(effective_words): # <<<<<<<<<<<<<< - * word_idx = indexes[i] - * word_subwords = model.wv.ngrams_word[model.wv.index2word[word_idx]] - */ - __pyx_t_2 = __pyx_v_effective_words; - for (__pyx_t_17 = 0; __pyx_t_17 < __pyx_t_2; __pyx_t_17+=1) { - __pyx_v_i = __pyx_t_17; - - /* "gensim/models/fasttext_inner.pyx":222 - * - * for i in range(effective_words): - * word_idx = indexes[i] # <<<<<<<<<<<<<< - * word_subwords = model.wv.ngrams_word[model.wv.index2word[word_idx]] - * subwords_idx_len[i] = len(word_subwords) + 1 - */ - __pyx_v_word_idx = (__pyx_v_indexes[__pyx_v_i]); - - /* "gensim/models/fasttext_inner.pyx":223 - * for i in range(effective_words): - * word_idx = indexes[i] - * word_subwords = model.wv.ngrams_word[model.wv.index2word[word_idx]] # <<<<<<<<<<<<<< - * subwords_idx_len[i] = len(word_subwords) + 1 - * subwords_idx[i][0] = word_idx - */ - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 223, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_14); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_ngrams_word); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 223, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 223, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_14); - __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_index2word); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 223, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_18); - __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = __Pyx_GetItemInt(__pyx_t_18, __pyx_v_word_idx, __pyx_t_5numpy_uint32_t, 0, __Pyx_PyInt_From_npy_uint32, 0, 0, 1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 223, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_14); - __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - __pyx_t_18 = PyObject_GetItem(__pyx_t_3, __pyx_t_14); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 223, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_18); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __Pyx_XDECREF_SET(__pyx_v_word_subwords, __pyx_t_18); - __pyx_t_18 = 0; - - /* "gensim/models/fasttext_inner.pyx":224 - * word_idx = indexes[i] - * word_subwords = model.wv.ngrams_word[model.wv.index2word[word_idx]] - * subwords_idx_len[i] = len(word_subwords) + 1 # <<<<<<<<<<<<<< - * subwords_idx[i][0] = word_idx - * for j, subword in enumerate(word_subwords): - */ - __pyx_t_6 = PyObject_Length(__pyx_v_word_subwords); if (unlikely(__pyx_t_6 == ((Py_ssize_t)-1))) __PYX_ERR(0, 224, __pyx_L1_error) - (__pyx_v_subwords_idx_len[__pyx_v_i]) = (__pyx_t_6 + 1); - - /* "gensim/models/fasttext_inner.pyx":225 - * word_subwords = model.wv.ngrams_word[model.wv.index2word[word_idx]] - * subwords_idx_len[i] = len(word_subwords) + 1 - * subwords_idx[i][0] = word_idx # <<<<<<<<<<<<<< - * for j, subword in enumerate(word_subwords): - * subwords_idx[i][j+1] = model.wv.ngrams[subword] - */ - ((__pyx_v_subwords_idx[__pyx_v_i])[0]) = __pyx_v_word_idx; - - /* "gensim/models/fasttext_inner.pyx":226 - * subwords_idx_len[i] = len(word_subwords) + 1 - * subwords_idx[i][0] = word_idx - * for j, subword in enumerate(word_subwords): # <<<<<<<<<<<<<< - * subwords_idx[i][j+1] = model.wv.ngrams[subword] - * - */ - __pyx_t_19 = 0; - if (likely(PyList_CheckExact(__pyx_v_word_subwords)) || PyTuple_CheckExact(__pyx_v_word_subwords)) { - __pyx_t_18 = __pyx_v_word_subwords; __Pyx_INCREF(__pyx_t_18); __pyx_t_6 = 0; - __pyx_t_10 = NULL; - } else { - __pyx_t_6 = -1; __pyx_t_18 = PyObject_GetIter(__pyx_v_word_subwords); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 226, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_18); - __pyx_t_10 = Py_TYPE(__pyx_t_18)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 226, __pyx_L1_error) - } - for (;;) { - if (likely(!__pyx_t_10)) { - if (likely(PyList_CheckExact(__pyx_t_18))) { - if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_18)) break; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_14 = PyList_GET_ITEM(__pyx_t_18, __pyx_t_6); __Pyx_INCREF(__pyx_t_14); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 226, __pyx_L1_error) - #else - __pyx_t_14 = PySequence_ITEM(__pyx_t_18, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 226, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_14); - #endif - } else { - if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_18)) break; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_14 = PyTuple_GET_ITEM(__pyx_t_18, __pyx_t_6); __Pyx_INCREF(__pyx_t_14); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 226, __pyx_L1_error) - #else - __pyx_t_14 = PySequence_ITEM(__pyx_t_18, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 226, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_14); - #endif - } - } else { - __pyx_t_14 = __pyx_t_10(__pyx_t_18); - if (unlikely(!__pyx_t_14)) { - PyObject* exc_type = PyErr_Occurred(); - if (exc_type) { - if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 226, __pyx_L1_error) - } - break; - } - __Pyx_GOTREF(__pyx_t_14); - } - __Pyx_XDECREF_SET(__pyx_v_subword, __pyx_t_14); - __pyx_t_14 = 0; - __pyx_v_j = __pyx_t_19; - __pyx_t_19 = (__pyx_t_19 + 1); - - /* "gensim/models/fasttext_inner.pyx":227 - * subwords_idx[i][0] = word_idx - * for j, subword in enumerate(word_subwords): - * subwords_idx[i][j+1] = model.wv.ngrams[subword] # <<<<<<<<<<<<<< - * - * with nogil: - */ - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 227, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_14); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 227, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = PyObject_GetItem(__pyx_t_3, __pyx_v_subword); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 227, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_14); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 227, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - ((__pyx_v_subwords_idx[__pyx_v_i])[(__pyx_v_j + 1)]) = __pyx_t_15; - - /* "gensim/models/fasttext_inner.pyx":226 - * subwords_idx_len[i] = len(word_subwords) + 1 - * subwords_idx[i][0] = word_idx - * for j, subword in enumerate(word_subwords): # <<<<<<<<<<<<<< - * subwords_idx[i][j+1] = model.wv.ngrams[subword] - * - */ - } - __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; - } - - /* "gensim/models/fasttext_inner.pyx":229 - * subwords_idx[i][j+1] = model.wv.ngrams[subword] - * * with nogil: # <<<<<<<<<<<<<< * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] @@ -3760,7 +3765,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON #endif /*try:*/ { - /* "gensim/models/fasttext_inner.pyx":230 + /* "gensim/models/fasttext_inner.pyx":229 * * with nogil: * for sent_idx in range(effective_sentences): # <<<<<<<<<<<<<< @@ -3768,10 +3773,10 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON * idx_end = sentence_idx[sent_idx + 1] */ __pyx_t_2 = __pyx_v_effective_sentences; - for (__pyx_t_17 = 0; __pyx_t_17 < __pyx_t_2; __pyx_t_17+=1) { - __pyx_v_sent_idx = __pyx_t_17; + for (__pyx_t_20 = 0; __pyx_t_20 < __pyx_t_2; __pyx_t_20+=1) { + __pyx_v_sent_idx = __pyx_t_20; - /* "gensim/models/fasttext_inner.pyx":231 + /* "gensim/models/fasttext_inner.pyx":230 * with nogil: * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] # <<<<<<<<<<<<<< @@ -3780,7 +3785,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_idx_start = (__pyx_v_sentence_idx[__pyx_v_sent_idx]); - /* "gensim/models/fasttext_inner.pyx":232 + /* "gensim/models/fasttext_inner.pyx":231 * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] # <<<<<<<<<<<<<< @@ -3789,18 +3794,18 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_idx_end = (__pyx_v_sentence_idx[(__pyx_v_sent_idx + 1)]); - /* "gensim/models/fasttext_inner.pyx":233 + /* "gensim/models/fasttext_inner.pyx":232 * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): # <<<<<<<<<<<<<< * j = i - window + reduced_windows[i] * if j < idx_start: */ - __pyx_t_19 = __pyx_v_idx_end; - for (__pyx_t_20 = __pyx_v_idx_start; __pyx_t_20 < __pyx_t_19; __pyx_t_20+=1) { - __pyx_v_i = __pyx_t_20; + __pyx_t_21 = __pyx_v_idx_end; + for (__pyx_t_22 = __pyx_v_idx_start; __pyx_t_22 < __pyx_t_21; __pyx_t_22+=1) { + __pyx_v_i = __pyx_t_22; - /* "gensim/models/fasttext_inner.pyx":234 + /* "gensim/models/fasttext_inner.pyx":233 * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -3809,7 +3814,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":235 + /* "gensim/models/fasttext_inner.pyx":234 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -3819,7 +3824,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_j < __pyx_v_idx_start) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":236 + /* "gensim/models/fasttext_inner.pyx":235 * j = i - window + reduced_windows[i] * if j < idx_start: * j = idx_start # <<<<<<<<<<<<<< @@ -3828,7 +3833,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_j = __pyx_v_idx_start; - /* "gensim/models/fasttext_inner.pyx":235 + /* "gensim/models/fasttext_inner.pyx":234 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -3837,7 +3842,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":237 + /* "gensim/models/fasttext_inner.pyx":236 * if j < idx_start: * j = idx_start * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -3846,7 +3851,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":238 + /* "gensim/models/fasttext_inner.pyx":237 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -3856,7 +3861,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_k > __pyx_v_idx_end) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":239 + /* "gensim/models/fasttext_inner.pyx":238 * k = i + window + 1 - reduced_windows[i] * if k > idx_end: * k = idx_end # <<<<<<<<<<<<<< @@ -3865,7 +3870,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_k = __pyx_v_idx_end; - /* "gensim/models/fasttext_inner.pyx":238 + /* "gensim/models/fasttext_inner.pyx":237 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -3874,18 +3879,18 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":240 + /* "gensim/models/fasttext_inner.pyx":239 * if k > idx_end: * k = idx_end * for j in range(j, k): # <<<<<<<<<<<<<< * if j == i: * continue */ - __pyx_t_21 = __pyx_v_k; - for (__pyx_t_22 = __pyx_v_j; __pyx_t_22 < __pyx_t_21; __pyx_t_22+=1) { - __pyx_v_j = __pyx_t_22; + __pyx_t_23 = __pyx_v_k; + for (__pyx_t_24 = __pyx_v_j; __pyx_t_24 < __pyx_t_23; __pyx_t_24+=1) { + __pyx_v_j = __pyx_t_24; - /* "gensim/models/fasttext_inner.pyx":241 + /* "gensim/models/fasttext_inner.pyx":240 * k = idx_end * for j in range(j, k): * if j == i: # <<<<<<<<<<<<<< @@ -3895,16 +3900,16 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_j == __pyx_v_i) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":242 + /* "gensim/models/fasttext_inner.pyx":241 * for j in range(j, k): * if j == i: * continue # <<<<<<<<<<<<<< * if hs: * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) */ - goto __pyx_L35_continue; + goto __pyx_L33_continue; - /* "gensim/models/fasttext_inner.pyx":241 + /* "gensim/models/fasttext_inner.pyx":240 * k = idx_end * for j in range(j, k): * if j == i: # <<<<<<<<<<<<<< @@ -3913,7 +3918,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":243 + /* "gensim/models/fasttext_inner.pyx":242 * if j == i: * continue * if hs: # <<<<<<<<<<<<<< @@ -3923,7 +3928,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":244 + /* "gensim/models/fasttext_inner.pyx":243 * continue * if hs: * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) # <<<<<<<<<<<<<< @@ -3932,7 +3937,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs((__pyx_v_points[__pyx_v_j]), (__pyx_v_codes[__pyx_v_j]), (__pyx_v_codelens[__pyx_v_j]), __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1, __pyx_v_size, (__pyx_v_subwords_idx[__pyx_v_i]), (__pyx_v_subwords_idx_len[__pyx_v_i]), __pyx_v__alpha, __pyx_v_work, __pyx_v_l1, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":243 + /* "gensim/models/fasttext_inner.pyx":242 * if j == i: * continue * if hs: # <<<<<<<<<<<<<< @@ -3941,7 +3946,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":245 + /* "gensim/models/fasttext_inner.pyx":244 * if hs: * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< @@ -3951,7 +3956,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":246 + /* "gensim/models/fasttext_inner.pyx":245 * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) * if negative: * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) # <<<<<<<<<<<<<< @@ -3960,7 +3965,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_next_random = __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), (__pyx_v_subwords_idx[__pyx_v_i]), (__pyx_v_subwords_idx_len[__pyx_v_i]), __pyx_v__alpha, __pyx_v_work, __pyx_v_l1, __pyx_v_next_random, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":245 + /* "gensim/models/fasttext_inner.pyx":244 * if hs: * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< @@ -3968,14 +3973,14 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON * */ } - __pyx_L35_continue:; + __pyx_L33_continue:; } } } } - /* "gensim/models/fasttext_inner.pyx":229 - * subwords_idx[i][j+1] = model.wv.ngrams[subword] + /* "gensim/models/fasttext_inner.pyx":228 + * reduced_windows[i] = item * * with nogil: # <<<<<<<<<<<<<< * for sent_idx in range(effective_sentences): @@ -3987,13 +3992,13 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_FastGIL_Forget(); Py_BLOCK_THREADS #endif - goto __pyx_L28; + goto __pyx_L26; } - __pyx_L28:; + __pyx_L26:; } } - /* "gensim/models/fasttext_inner.pyx":248 + /* "gensim/models/fasttext_inner.pyx":247 * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) * * return effective_words # <<<<<<<<<<<<<< @@ -4001,10 +4006,10 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_18 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_18)) __PYX_ERR(0, 248, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_18); - __pyx_r = __pyx_t_18; - __pyx_t_18 = 0; + __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 247, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __pyx_r = __pyx_t_16; + __pyx_t_16 = 0; goto __pyx_L0; /* "gensim/models/fasttext_inner.pyx":130 @@ -4022,7 +4027,8 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF(__pyx_t_8); __Pyx_XDECREF(__pyx_t_13); __Pyx_XDECREF(__pyx_t_14); - __Pyx_XDECREF(__pyx_t_18); + __Pyx_XDECREF(__pyx_t_16); + __Pyx_XDECREF(__pyx_t_17); __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_sg", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; @@ -4030,15 +4036,16 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF(__pyx_v_sent); __Pyx_XDECREF(__pyx_v_token); __Pyx_XDECREF(__pyx_v_word); - __Pyx_XDECREF(__pyx_v_item); + __Pyx_XDECREF(__pyx_v_subwords); __Pyx_XDECREF(__pyx_v_word_subwords); - __Pyx_XDECREF(__pyx_v_subword); + __Pyx_XDECREF(__pyx_v_item); + __Pyx_XDECREF(__pyx_v_subword_i); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "gensim/models/fasttext_inner.pyx":251 +/* "gensim/models/fasttext_inner.pyx":250 * * * def init(): # <<<<<<<<<<<<<< @@ -4078,7 +4085,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P int __pyx_t_5; __Pyx_RefNannySetupContext("init", 0); - /* "gensim/models/fasttext_inner.pyx":260 + /* "gensim/models/fasttext_inner.pyx":259 * * cdef int i * cdef float *x = [10.0] # <<<<<<<<<<<<<< @@ -4088,7 +4095,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P __pyx_t_1[0] = ((float)10.0); __pyx_v_x = __pyx_t_1; - /* "gensim/models/fasttext_inner.pyx":261 + /* "gensim/models/fasttext_inner.pyx":260 * cdef int i * cdef float *x = [10.0] * cdef float *y = [0.01] # <<<<<<<<<<<<<< @@ -4098,7 +4105,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P __pyx_t_2[0] = ((float)0.01); __pyx_v_y = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":262 + /* "gensim/models/fasttext_inner.pyx":261 * cdef float *x = [10.0] * cdef float *y = [0.01] * cdef float expected = 0.1 # <<<<<<<<<<<<<< @@ -4107,7 +4114,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ __pyx_v_expected = ((float)0.1); - /* "gensim/models/fasttext_inner.pyx":263 + /* "gensim/models/fasttext_inner.pyx":262 * cdef float *y = [0.01] * cdef float expected = 0.1 * cdef int size = 1 # <<<<<<<<<<<<<< @@ -4116,7 +4123,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ __pyx_v_size = 1; - /* "gensim/models/fasttext_inner.pyx":268 + /* "gensim/models/fasttext_inner.pyx":267 * * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): # <<<<<<<<<<<<<< @@ -4126,7 +4133,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P for (__pyx_t_3 = 0; __pyx_t_3 < 0x3E8; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "gensim/models/fasttext_inner.pyx":269 + /* "gensim/models/fasttext_inner.pyx":268 * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) # <<<<<<<<<<<<<< @@ -4135,7 +4142,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)exp(((((__pyx_v_i / ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0x3E8)) * 2.0) - 1.0) * 6.0))); - /* "gensim/models/fasttext_inner.pyx":270 + /* "gensim/models/fasttext_inner.pyx":269 * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) # <<<<<<<<<<<<<< @@ -4145,11 +4152,11 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P __pyx_t_4 = ((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) + 1.0); if (unlikely(__pyx_t_4 == 0)) { PyErr_SetString(PyExc_ZeroDivisionError, "float division"); - __PYX_ERR(0, 270, __pyx_L1_error) + __PYX_ERR(0, 269, __pyx_L1_error) } (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) / __pyx_t_4)); - /* "gensim/models/fasttext_inner.pyx":271 + /* "gensim/models/fasttext_inner.pyx":270 * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) * LOG_TABLE[i] = log( EXP_TABLE[i] ) # <<<<<<<<<<<<<< @@ -4159,7 +4166,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P (__pyx_v_6gensim_6models_14fasttext_inner_LOG_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)log((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]))); } - /* "gensim/models/fasttext_inner.pyx":274 + /* "gensim/models/fasttext_inner.pyx":273 * * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) # <<<<<<<<<<<<<< @@ -4168,7 +4175,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ __pyx_v_d_res = __pyx_v_6gensim_6models_14word2vec_inner_dsdot((&__pyx_v_size), __pyx_v_x, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_y, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":275 + /* "gensim/models/fasttext_inner.pyx":274 * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res # <<<<<<<<<<<<<< @@ -4177,7 +4184,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ __pyx_v_p_res = ((float *)(&__pyx_v_d_res)); - /* "gensim/models/fasttext_inner.pyx":276 + /* "gensim/models/fasttext_inner.pyx":275 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -4187,7 +4194,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P __pyx_t_5 = ((fabs((__pyx_v_d_res - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":277 + /* "gensim/models/fasttext_inner.pyx":276 * p_res = &d_res * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double # <<<<<<<<<<<<<< @@ -4196,7 +4203,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_double; - /* "gensim/models/fasttext_inner.pyx":278 + /* "gensim/models/fasttext_inner.pyx":277 * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -4205,7 +4212,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_6gensim_6models_14word2vec_inner_saxpy; - /* "gensim/models/fasttext_inner.pyx":279 + /* "gensim/models/fasttext_inner.pyx":278 * our_dot = our_dot_double * our_saxpy = saxpy * return 0 # double # <<<<<<<<<<<<<< @@ -4217,7 +4224,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P __pyx_r = __pyx_int_0; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":276 + /* "gensim/models/fasttext_inner.pyx":275 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -4226,7 +4233,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ } - /* "gensim/models/fasttext_inner.pyx":280 + /* "gensim/models/fasttext_inner.pyx":279 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -4236,7 +4243,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P __pyx_t_5 = ((fabsf(((__pyx_v_p_res[0]) - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":281 + /* "gensim/models/fasttext_inner.pyx":280 * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float # <<<<<<<<<<<<<< @@ -4245,7 +4252,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_float; - /* "gensim/models/fasttext_inner.pyx":282 + /* "gensim/models/fasttext_inner.pyx":281 * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -4254,7 +4261,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_6gensim_6models_14word2vec_inner_saxpy; - /* "gensim/models/fasttext_inner.pyx":283 + /* "gensim/models/fasttext_inner.pyx":282 * our_dot = our_dot_float * our_saxpy = saxpy * return 1 # float # <<<<<<<<<<<<<< @@ -4266,7 +4273,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P __pyx_r = __pyx_int_1; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":280 + /* "gensim/models/fasttext_inner.pyx":279 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -4275,7 +4282,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ } - /* "gensim/models/fasttext_inner.pyx":287 + /* "gensim/models/fasttext_inner.pyx":286 * # neither => use cython loops, no BLAS * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas # <<<<<<<<<<<<<< @@ -4285,7 +4292,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P /*else*/ { __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_noblas; - /* "gensim/models/fasttext_inner.pyx":288 + /* "gensim/models/fasttext_inner.pyx":287 * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas # <<<<<<<<<<<<<< @@ -4294,7 +4301,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas; - /* "gensim/models/fasttext_inner.pyx":289 + /* "gensim/models/fasttext_inner.pyx":288 * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas * return 2 # <<<<<<<<<<<<<< @@ -4307,7 +4314,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P goto __pyx_L0; } - /* "gensim/models/fasttext_inner.pyx":251 + /* "gensim/models/fasttext_inner.pyx":250 * * * def init(): # <<<<<<<<<<<<<< @@ -6956,6 +6963,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s__12, __pyx_k__12, sizeof(__pyx_k__12), 0, 0, 1, 1}, {&__pyx_n_s_alpha, __pyx_k_alpha, sizeof(__pyx_k_alpha), 0, 0, 1, 1}, {&__pyx_n_s_alpha_2, __pyx_k_alpha_2, sizeof(__pyx_k_alpha_2), 0, 0, 1, 1}, + {&__pyx_n_s_array, __pyx_k_array, sizeof(__pyx_k_array), 0, 0, 1, 1}, {&__pyx_n_s_cline_in_traceback, __pyx_k_cline_in_traceback, sizeof(__pyx_k_cline_in_traceback), 0, 0, 1, 1}, {&__pyx_n_s_code, __pyx_k_code, sizeof(__pyx_k_code), 0, 0, 1, 1}, {&__pyx_n_s_codelens, __pyx_k_codelens, sizeof(__pyx_k_codelens), 0, 0, 1, 1}, @@ -6963,6 +6971,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_cum_table, __pyx_k_cum_table, sizeof(__pyx_k_cum_table), 0, 0, 1, 1}, {&__pyx_n_s_cum_table_len, __pyx_k_cum_table_len, sizeof(__pyx_k_cum_table_len), 0, 0, 1, 1}, {&__pyx_n_s_d_res, __pyx_k_d_res, sizeof(__pyx_k_d_res), 0, 0, 1, 1}, + {&__pyx_n_s_dtype, __pyx_k_dtype, sizeof(__pyx_k_dtype), 0, 0, 1, 1}, {&__pyx_n_s_effective_sentences, __pyx_k_effective_sentences, sizeof(__pyx_k_effective_sentences), 0, 0, 1, 1}, {&__pyx_n_s_effective_words, __pyx_k_effective_words, sizeof(__pyx_k_effective_words), 0, 0, 1, 1}, {&__pyx_n_s_enumerate, __pyx_k_enumerate, sizeof(__pyx_k_enumerate), 0, 0, 1, 1}, @@ -7015,7 +7024,8 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_sentence_idx, __pyx_k_sentence_idx, sizeof(__pyx_k_sentence_idx), 0, 0, 1, 1}, {&__pyx_n_s_sentences, __pyx_k_sentences, sizeof(__pyx_k_sentences), 0, 0, 1, 1}, {&__pyx_n_s_size, __pyx_k_size, sizeof(__pyx_k_size), 0, 0, 1, 1}, - {&__pyx_n_s_subword, __pyx_k_subword, sizeof(__pyx_k_subword), 0, 0, 1, 1}, + {&__pyx_n_s_subword_i, __pyx_k_subword_i, sizeof(__pyx_k_subword_i), 0, 0, 1, 1}, + {&__pyx_n_s_subwords, __pyx_k_subwords, sizeof(__pyx_k_subwords), 0, 0, 1, 1}, {&__pyx_n_s_subwords_idx, __pyx_k_subwords_idx, sizeof(__pyx_k_subwords_idx), 0, 0, 1, 1}, {&__pyx_n_s_subwords_idx_len, __pyx_k_subwords_idx_len, sizeof(__pyx_k_subwords_idx_len), 0, 0, 1, 1}, {&__pyx_n_s_syn0_ngrams, __pyx_k_syn0_ngrams, sizeof(__pyx_k_syn0_ngrams), 0, 0, 1, 1}, @@ -7027,13 +7037,13 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, {&__pyx_n_s_token, __pyx_k_token, sizeof(__pyx_k_token), 0, 0, 1, 1}, {&__pyx_n_s_train_batch_sg, __pyx_k_train_batch_sg, sizeof(__pyx_k_train_batch_sg), 0, 0, 1, 1}, + {&__pyx_n_s_uint32, __pyx_k_uint32, sizeof(__pyx_k_uint32), 0, 0, 1, 1}, {&__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_k_unknown_dtype_code_in_numpy_pxd, sizeof(__pyx_k_unknown_dtype_code_in_numpy_pxd), 0, 1, 0, 0}, {&__pyx_n_s_vlookup, __pyx_k_vlookup, sizeof(__pyx_k_vlookup), 0, 0, 1, 1}, {&__pyx_n_s_vocab, __pyx_k_vocab, sizeof(__pyx_k_vocab), 0, 0, 1, 1}, {&__pyx_n_s_window, __pyx_k_window, sizeof(__pyx_k_window), 0, 0, 1, 1}, {&__pyx_n_s_word, __pyx_k_word, sizeof(__pyx_k_word), 0, 0, 1, 1}, {&__pyx_n_s_word2vec, __pyx_k_word2vec, sizeof(__pyx_k_word2vec), 0, 0, 1, 1}, - {&__pyx_n_s_word_idx, __pyx_k_word_idx, sizeof(__pyx_k_word_idx), 0, 0, 1, 1}, {&__pyx_n_s_word_locks_ngrams, __pyx_k_word_locks_ngrams, sizeof(__pyx_k_word_locks_ngrams), 0, 0, 1, 1}, {&__pyx_n_s_word_locks_vocab, __pyx_k_word_locks_vocab, sizeof(__pyx_k_word_locks_vocab), 0, 0, 1, 1}, {&__pyx_n_s_word_subwords, __pyx_k_word_subwords, sizeof(__pyx_k_word_subwords), 0, 0, 1, 1}, @@ -7047,7 +7057,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { static int __Pyx_InitCachedBuiltins(void) { __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_n_s_ImportError); if (!__pyx_builtin_ImportError) __PYX_ERR(0, 19, __pyx_L1_error) __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 63, __pyx_L1_error) - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(0, 218, __pyx_L1_error) + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(0, 225, __pyx_L1_error) __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) __PYX_ERR(1, 235, __pyx_L1_error) __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) __PYX_ERR(1, 823, __pyx_L1_error) return 0; @@ -7059,17 +7069,17 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - /* "gensim/models/fasttext_inner.pyx":180 + /* "gensim/models/fasttext_inner.pyx":181 * cum_table_len = len(model.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 180, __pyx_L1_error) + __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 181, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple_); __Pyx_GIVEREF(__pyx_tuple_); - __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(0, 180, __pyx_L1_error) + __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(0, 181, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); @@ -7177,22 +7187,22 @@ static int __Pyx_InitCachedConstants(void) { * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__13 = PyTuple_Pack(46, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_l1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_l1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_item, __pyx_n_s_word_idx, __pyx_n_s_word_subwords, __pyx_n_s_subword); if (unlikely(!__pyx_tuple__13)) __PYX_ERR(0, 130, __pyx_L1_error) + __pyx_tuple__13 = PyTuple_Pack(46, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_l1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_l1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword_i); if (unlikely(!__pyx_tuple__13)) __PYX_ERR(0, 130, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__13); __Pyx_GIVEREF(__pyx_tuple__13); __pyx_codeobj__14 = (PyObject*)__Pyx_PyCode_New(5, 0, 46, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__13, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_train_batch_sg, 130, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__14)) __PYX_ERR(0, 130, __pyx_L1_error) - /* "gensim/models/fasttext_inner.pyx":251 + /* "gensim/models/fasttext_inner.pyx":250 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_tuple__15 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__15)) __PYX_ERR(0, 251, __pyx_L1_error) + __pyx_tuple__15 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__15)) __PYX_ERR(0, 250, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__15); __Pyx_GIVEREF(__pyx_tuple__15); - __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_init, 251, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) __PYX_ERR(0, 251, __pyx_L1_error) + __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_init, 250, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) __PYX_ERR(0, 250, __pyx_L1_error) __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -7610,25 +7620,25 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_sg, __pyx_t_9) < 0) __PYX_ERR(0, 130, __pyx_L1_error) __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":251 + /* "gensim/models/fasttext_inner.pyx":250 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_3init, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 251, __pyx_L1_error) + __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_3init, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 250, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_9) < 0) __PYX_ERR(0, 251, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_9) < 0) __PYX_ERR(0, 250, __pyx_L1_error) __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":291 + /* "gensim/models/fasttext_inner.pyx":290 * return 2 * * FAST_VERSION = init() # initialize the module # <<<<<<<<<<<<<< * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN */ - __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 291, __pyx_L1_error) + __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 290, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_3 = NULL; if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_4))) { @@ -7641,22 +7651,22 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) } } if (__pyx_t_3) { - __pyx_t_9 = __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_t_3); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 291, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_t_3); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 290, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { - __pyx_t_9 = __Pyx_PyObject_CallNoArg(__pyx_t_4); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 291, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_CallNoArg(__pyx_t_4); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 290, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_9) < 0) __PYX_ERR(0, 291, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_9) < 0) __PYX_ERR(0, 290, __pyx_L1_error) __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":292 + /* "gensim/models/fasttext_inner.pyx":291 * * FAST_VERSION = init() # initialize the module * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN # <<<<<<<<<<<<<< */ - if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_int_10000) < 0) __PYX_ERR(0, 292, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_int_10000) < 0) __PYX_ERR(0, 291, __pyx_L1_error) /* "gensim/models/fasttext_inner.pyx":1 * #!/usr/bin/env cython # <<<<<<<<<<<<<< @@ -7979,8 +7989,26 @@ static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg } #endif +/* GetModuleGlobalName */ +static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name) { + PyObject *result; +#if !CYTHON_AVOID_BORROWED_REFS + result = PyDict_GetItem(__pyx_d, name); + if (likely(result)) { + Py_INCREF(result); + } else { +#else + result = PyObject_GetItem(__pyx_d, name); + if (!result) { + PyErr_Clear(); +#endif + result = __Pyx_GetBuiltinName(name); + } + return result; +} + /* PyFunctionFastCall */ -#if CYTHON_FAST_PYCALL + #if CYTHON_FAST_PYCALL #include "frameobject.h" static PyObject* __Pyx_PyFunction_FastCallNoKw(PyCodeObject *co, PyObject **args, Py_ssize_t na, PyObject *globals) { @@ -8100,7 +8128,7 @@ static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, #endif /* PyCFunctionFastCall */ -#if CYTHON_FAST_PYCCALL + #if CYTHON_FAST_PYCCALL static CYTHON_INLINE PyObject * __Pyx_PyCFunction_FastCall(PyObject *func_obj, PyObject **args, Py_ssize_t nargs) { PyCFunctionObject *func = (PyCFunctionObject*)func_obj; PyCFunction meth = PyCFunction_GET_FUNCTION(func); @@ -8122,95 +8150,8 @@ static CYTHON_INLINE PyObject * __Pyx_PyCFunction_FastCall(PyObject *func_obj, P } #endif -/* GetItemInt */ -static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) { - PyObject *r; - if (!j) return NULL; - r = PyObject_GetItem(o, j); - Py_DECREF(j); - return r; -} -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, - CYTHON_NCP_UNUSED int wraparound, - CYTHON_NCP_UNUSED int boundscheck) { -#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - Py_ssize_t wrapped_i = i; - if (wraparound & unlikely(i < 0)) { - wrapped_i += PyList_GET_SIZE(o); - } - if ((!boundscheck) || likely((0 <= wrapped_i) & (wrapped_i < PyList_GET_SIZE(o)))) { - PyObject *r = PyList_GET_ITEM(o, wrapped_i); - Py_INCREF(r); - return r; - } - return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); -#else - return PySequence_GetItem(o, i); -#endif -} -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, - CYTHON_NCP_UNUSED int wraparound, - CYTHON_NCP_UNUSED int boundscheck) { -#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - Py_ssize_t wrapped_i = i; - if (wraparound & unlikely(i < 0)) { - wrapped_i += PyTuple_GET_SIZE(o); - } - if ((!boundscheck) || likely((0 <= wrapped_i) & (wrapped_i < PyTuple_GET_SIZE(o)))) { - PyObject *r = PyTuple_GET_ITEM(o, wrapped_i); - Py_INCREF(r); - return r; - } - return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); -#else - return PySequence_GetItem(o, i); -#endif -} -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list, - CYTHON_NCP_UNUSED int wraparound, - CYTHON_NCP_UNUSED int boundscheck) { -#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS - if (is_list || PyList_CheckExact(o)) { - Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o); - if ((!boundscheck) || (likely((n >= 0) & (n < PyList_GET_SIZE(o))))) { - PyObject *r = PyList_GET_ITEM(o, n); - Py_INCREF(r); - return r; - } - } - else if (PyTuple_CheckExact(o)) { - Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o); - if ((!boundscheck) || likely((n >= 0) & (n < PyTuple_GET_SIZE(o)))) { - PyObject *r = PyTuple_GET_ITEM(o, n); - Py_INCREF(r); - return r; - } - } else { - PySequenceMethods *m = Py_TYPE(o)->tp_as_sequence; - if (likely(m && m->sq_item)) { - if (wraparound && unlikely(i < 0) && likely(m->sq_length)) { - Py_ssize_t l = m->sq_length(o); - if (likely(l >= 0)) { - i += l; - } else { - if (!PyErr_ExceptionMatches(PyExc_OverflowError)) - return NULL; - PyErr_Clear(); - } - } - return m->sq_item(o, i); - } - } -#else - if (is_list || PySequence_Check(o)) { - return PySequence_GetItem(o, i); - } -#endif - return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); -} - /* RaiseException */ -#if PY_MAJOR_VERSION < 3 + #if PY_MAJOR_VERSION < 3 static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, CYTHON_UNUSED PyObject *cause) { __Pyx_PyThreadState_declare @@ -8369,25 +8310,25 @@ static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject #endif /* RaiseTooManyValuesToUnpack */ -static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected) { + static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected) { PyErr_Format(PyExc_ValueError, "too many values to unpack (expected %" CYTHON_FORMAT_SSIZE_T "d)", expected); } /* RaiseNeedMoreValuesToUnpack */ -static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) { + static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) { PyErr_Format(PyExc_ValueError, "need more than %" CYTHON_FORMAT_SSIZE_T "d value%.1s to unpack", index, (index == 1) ? "" : "s"); } /* RaiseNoneIterError */ -static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void) { + static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); } /* SaveResetException */ -#if CYTHON_FAST_THREAD_STATE + #if CYTHON_FAST_THREAD_STATE static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { #if PY_VERSION_HEX >= 0x030700A2 *type = tstate->exc_state.exc_type; @@ -8426,7 +8367,7 @@ static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject #endif /* PyErrExceptionMatches */ -#if CYTHON_FAST_THREAD_STATE + #if CYTHON_FAST_THREAD_STATE static int __Pyx_PyErr_ExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { Py_ssize_t i, n; n = PyTuple_GET_SIZE(tuple); @@ -8451,7 +8392,7 @@ static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tsta #endif /* GetException */ -#if CYTHON_FAST_THREAD_STATE + #if CYTHON_FAST_THREAD_STATE static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { #else static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb) { @@ -8521,7 +8462,7 @@ static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb) } /* Import */ - static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { + static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { PyObject *empty_list = 0; PyObject *module = 0; PyObject *global_dict = 0; @@ -8586,7 +8527,7 @@ static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb) } /* ImportFrom */ - static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) { + static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) { PyObject* value = __Pyx_PyObject_GetAttrStr(module, name); if (unlikely(!value) && PyErr_ExceptionMatches(PyExc_AttributeError)) { PyErr_Format(PyExc_ImportError, @@ -8599,24 +8540,6 @@ static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb) return value; } -/* GetModuleGlobalName */ - static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name) { - PyObject *result; -#if !CYTHON_AVOID_BORROWED_REFS - result = PyDict_GetItem(__pyx_d, name); - if (likely(result)) { - Py_INCREF(result); - } else { -#else - result = PyObject_GetItem(__pyx_d, name); - if (!result) { - PyErr_Clear(); -#endif - result = __Pyx_GetBuiltinName(name); - } - return result; -} - /* PyObjectCallMethO */ #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { diff --git a/gensim/models/fasttext_inner.pyx b/gensim/models/fasttext_inner.pyx index 9dc6f34ec2..8442215f20 100644 --- a/gensim/models/fasttext_inner.pyx +++ b/gensim/models/fasttext_inner.pyx @@ -44,7 +44,7 @@ cdef REAL_t ONEF = 1.0 cdef unsigned long long fast_sentence_sg_neg( const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1neg, const int size, - const np.uint32_t word_index, np.uint32_t *subwords_index, const np.uint32_t subwords_len, + const np.uint32_t word_index, const np.uint32_t *subwords_index, const np.uint32_t subwords_len, const REAL_t alpha, REAL_t *work, REAL_t *l1, unsigned long long next_random, REAL_t *word_locks_vocab, REAL_t *word_locks_ngrams) nogil: @@ -93,7 +93,7 @@ cdef unsigned long long fast_sentence_sg_neg( cdef void fast_sentence_sg_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1, const int size, - np.uint32_t *subwords_index, const np.uint32_t subwords_len, + const np.uint32_t *subwords_index, const np.uint32_t subwords_len, const REAL_t alpha, REAL_t *work, REAL_t *l1, REAL_t *word_locks_vocab, REAL_t *word_locks_ngrams) nogil: @@ -166,8 +166,9 @@ def train_batch_sg(model, sentences, alpha, _work, _l1): cdef unsigned long long next_random # For passing subwords information as C objects for nogil - cdef np.uint32_t subwords_idx_len[MAX_SENTENCE_LEN] - cdef np.uint32_t subwords_idx[MAX_SENTENCE_LEN][MAX_SUBWORDS] + cdef int subwords_idx_len[MAX_SENTENCE_LEN] + cdef np.uint32_t *subwords_idx[MAX_SENTENCE_LEN] + if hs: syn1 = (np.PyArray_DATA(model.syn1)) @@ -196,6 +197,12 @@ def train_batch_sg(model, sentences, alpha, _work, _l1): if sample and word.sample_int < random_int32(&next_random): continue indexes[effective_words] = word.index + + subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] + word_subwords = np.array([word.index] + subwords, dtype=np.uint32) + subwords_idx_len[effective_words] = (len(subwords) + 1) + subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) + if hs: codelens[effective_words] = len(word.code) codes[effective_words] = np.PyArray_DATA(word.code) @@ -218,14 +225,6 @@ def train_batch_sg(model, sentences, alpha, _work, _l1): for i, item in enumerate(model.random.randint(0, window, effective_words)): reduced_windows[i] = item - for i in range(effective_words): - word_idx = indexes[i] - word_subwords = model.wv.ngrams_word[model.wv.index2word[word_idx]] - subwords_idx_len[i] = len(word_subwords) + 1 - subwords_idx[i][0] = word_idx - for j, subword in enumerate(word_subwords): - subwords_idx[i][j+1] = model.wv.ngrams[subword] - with nogil: for sent_idx in range(effective_sentences): idx_start = sentence_idx[sent_idx] From 73e917676780be1aceb0c37a2bbeaf69b9d81e97 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Tue, 28 Nov 2017 04:24:33 +0530 Subject: [PATCH 07/22] fixes accuracy issues due to reference counts of word_subwords becoming 0 --- gensim/models/fasttext_inner.pyx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/gensim/models/fasttext_inner.pyx b/gensim/models/fasttext_inner.pyx index 8442215f20..fb9cb08c1f 100644 --- a/gensim/models/fasttext_inner.pyx +++ b/gensim/models/fasttext_inner.pyx @@ -168,7 +168,9 @@ def train_batch_sg(model, sentences, alpha, _work, _l1): # For passing subwords information as C objects for nogil cdef int subwords_idx_len[MAX_SENTENCE_LEN] cdef np.uint32_t *subwords_idx[MAX_SENTENCE_LEN] - + # dummy dictionary to ensure that the memory locations that subwords_idx point to + # are referenced throughout so that it isn't put back to free memory pool by Python's memory manager + subword_arrays = {} if hs: syn1 = (np.PyArray_DATA(model.syn1)) @@ -202,6 +204,8 @@ def train_batch_sg(model, sentences, alpha, _work, _l1): word_subwords = np.array([word.index] + subwords, dtype=np.uint32) subwords_idx_len[effective_words] = (len(subwords) + 1) subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) + # ensures reference count of word_subwords doesn't reach 0 + subword_arrays[effective_words] = word_subwords if hs: codelens[effective_words] = len(word.code) From 2d0e1a6c807d333ad575c516a2deea5de12133dc Mon Sep 17 00:00:00 2001 From: manneshiva Date: Wed, 29 Nov 2017 04:22:25 +0530 Subject: [PATCH 08/22] cythonizes fasttext cbow architecture --- gensim/models/fasttext.py | 4 +- gensim/models/fasttext_inner.c | 4045 ++++++++++++++++++++++++------ gensim/models/fasttext_inner.pyx | 251 ++ 3 files changed, 3502 insertions(+), 798 deletions(-) diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index a72c93322f..f089bc65de 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -14,12 +14,10 @@ logger = logging.getLogger(__name__) try: - # TODO : log FAST_VERSION - from gensim.models.fasttext_inner import train_batch_sg + from gensim.models.fasttext_inner import train_batch_sg, train_batch_cbow from gensim.models.fasttext_inner import FAST_VERSION, MAX_WORDS_IN_BATCH except ImportError: - # why falling back - log # failed... fall back to plain numpy (20-80x slower training than the above) FAST_VERSION = -1 MAX_WORDS_IN_BATCH = 10000 diff --git a/gensim/models/fasttext_inner.c b/gensim/models/fasttext_inner.c index c80ea35789..0f33a8ecdc 100644 --- a/gensim/models/fasttext_inner.c +++ b/gensim/models/fasttext_inner.c @@ -756,11 +756,6 @@ static const char *__pyx_f[] = { "__init__.pxd", "type.pxd", }; -/* ForceInitThreads.proto */ -#ifndef __PYX_FORCE_INIT_THREADS - #define __PYX_FORCE_INIT_THREADS 0 -#endif - /* NoFastGil.proto */ #define __Pyx_PyGILState_Ensure PyGILState_Ensure #define __Pyx_PyGILState_Release PyGILState_Release @@ -768,6 +763,11 @@ static const char *__pyx_f[] = { #define __Pyx_FastGIL_Forget() #define __Pyx_FastGilFuncInit() +/* ForceInitThreads.proto */ +#ifndef __PYX_FORCE_INIT_THREADS + #define __PYX_FORCE_INIT_THREADS 0 +#endif + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":743 * # in Cython to enable them only on the right systems. @@ -1184,47 +1184,6 @@ static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject /* GetBuiltinName.proto */ static PyObject *__Pyx_GetBuiltinName(PyObject *name); -/* PyThreadStateGet.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate; -#define __Pyx_PyThreadState_assign __pyx_tstate = __Pyx_PyThreadState_Current; -#define __Pyx_PyErr_Occurred() __pyx_tstate->curexc_type -#else -#define __Pyx_PyThreadState_declare -#define __Pyx_PyThreadState_assign -#define __Pyx_PyErr_Occurred() PyErr_Occurred() -#endif - -/* PyErrFetchRestore.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL) -#define __Pyx_ErrRestoreWithState(type, value, tb) __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb) -#define __Pyx_ErrFetchWithState(type, value, tb) __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb) -#define __Pyx_ErrRestore(type, value, tb) __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb) -#define __Pyx_ErrFetch(type, value, tb) __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb) -static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); -static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); -#if CYTHON_COMPILING_IN_CPYTHON -#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL)) -#else -#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) -#endif -#else -#define __Pyx_PyErr_Clear() PyErr_Clear() -#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) -#define __Pyx_ErrRestoreWithState(type, value, tb) PyErr_Restore(type, value, tb) -#define __Pyx_ErrFetchWithState(type, value, tb) PyErr_Fetch(type, value, tb) -#define __Pyx_ErrRestoreInState(tstate, type, value, tb) PyErr_Restore(type, value, tb) -#define __Pyx_ErrFetchInState(tstate, type, value, tb) PyErr_Fetch(type, value, tb) -#define __Pyx_ErrRestore(type, value, tb) PyErr_Restore(type, value, tb) -#define __Pyx_ErrFetch(type, value, tb) PyErr_Fetch(type, value, tb) -#endif - -/* WriteUnraisableException.proto */ -static void __Pyx_WriteUnraisable(const char *name, int clineno, - int lineno, const char *filename, - int full_traceback, int nogil); - /* RaiseArgTupleInvalid.proto */ static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); @@ -1291,6 +1250,42 @@ static CYTHON_INLINE PyObject *__Pyx_PyCFunction_FastCall(PyObject *func, PyObje #define __Pyx_PyCFunction_FastCall(func, args, nargs) (assert(0), NULL) #endif +/* PyThreadStateGet.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate; +#define __Pyx_PyThreadState_assign __pyx_tstate = __Pyx_PyThreadState_Current; +#define __Pyx_PyErr_Occurred() __pyx_tstate->curexc_type +#else +#define __Pyx_PyThreadState_declare +#define __Pyx_PyThreadState_assign +#define __Pyx_PyErr_Occurred() PyErr_Occurred() +#endif + +/* PyErrFetchRestore.proto */ +#if CYTHON_FAST_THREAD_STATE +#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL) +#define __Pyx_ErrRestoreWithState(type, value, tb) __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb) +#define __Pyx_ErrFetchWithState(type, value, tb) __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb) +#define __Pyx_ErrRestore(type, value, tb) __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb) +#define __Pyx_ErrFetch(type, value, tb) __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb) +static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); +static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); +#if CYTHON_COMPILING_IN_CPYTHON +#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL)) +#else +#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) +#endif +#else +#define __Pyx_PyErr_Clear() PyErr_Clear() +#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) +#define __Pyx_ErrRestoreWithState(type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetchWithState(type, value, tb) PyErr_Fetch(type, value, tb) +#define __Pyx_ErrRestoreInState(tstate, type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetchInState(tstate, type, value, tb) PyErr_Fetch(type, value, tb) +#define __Pyx_ErrRestore(type, value, tb) PyErr_Restore(type, value, tb) +#define __Pyx_ErrFetch(type, value, tb) PyErr_Fetch(type, value, tb) +#endif + /* RaiseException.proto */ static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); @@ -1633,6 +1628,8 @@ static int __pyx_v_6gensim_6models_14fasttext_inner_ONE; static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_6gensim_6models_14fasttext_inner_ONEF; static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint32_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, unsigned PY_LONG_LONG, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint32_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ +static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, int *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint32_t const **, int const *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, int, int, int, int, unsigned PY_LONG_LONG, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ +static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint32_t const **, int const *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, int, int, int, int, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ #define __Pyx_MODULE_NAME "gensim.models.fasttext_inner" extern int __pyx_module_is_main_gensim__models__fasttext_inner; int __pyx_module_is_main_gensim__models__fasttext_inner = 0; @@ -1652,13 +1649,14 @@ static const char __pyx_k_hs[] = "hs"; static const char __pyx_k_l1[] = "_l1"; static const char __pyx_k_np[] = "np"; static const char __pyx_k_wv[] = "wv"; -static const char __pyx_k__12[] = "*"; +static const char __pyx_k__14[] = "*"; static const char __pyx_k_REAL[] = "REAL"; static const char __pyx_k_code[] = "code"; static const char __pyx_k_init[] = "init"; static const char __pyx_k_item[] = "item"; static const char __pyx_k_l1_2[] = "l1"; static const char __pyx_k_main[] = "__main__"; +static const char __pyx_k_neu1[] = "_neu1"; static const char __pyx_k_sent[] = "sent"; static const char __pyx_k_size[] = "size"; static const char __pyx_k_syn1[] = "syn1"; @@ -1681,6 +1679,7 @@ static const char __pyx_k_token[] = "token"; static const char __pyx_k_vocab[] = "vocab"; static const char __pyx_k_gensim[] = "gensim"; static const char __pyx_k_import[] = "__import__"; +static const char __pyx_k_neu1_2[] = "neu1"; static const char __pyx_k_ngrams[] = "ngrams"; static const char __pyx_k_points[] = "points"; static const char __pyx_k_random[] = "random"; @@ -1702,6 +1701,7 @@ static const char __pyx_k_negative[] = "negative"; static const char __pyx_k_sent_idx[] = "sent_idx"; static const char __pyx_k_subwords[] = "subwords"; static const char __pyx_k_word2vec[] = "word2vec"; +static const char __pyx_k_cbow_mean[] = "cbow_mean"; static const char __pyx_k_cum_table[] = "cum_table"; static const char __pyx_k_enumerate[] = "enumerate"; static const char __pyx_k_idx_start[] = "idx_start"; @@ -1722,11 +1722,13 @@ static const char __pyx_k_sentence_idx[] = "sentence_idx"; static const char __pyx_k_subwords_idx[] = "subwords_idx"; static const char __pyx_k_cum_table_len[] = "cum_table_len"; static const char __pyx_k_word_subwords[] = "word_subwords"; +static const char __pyx_k_subword_arrays[] = "subword_arrays"; static const char __pyx_k_train_batch_sg[] = "train_batch_sg"; static const char __pyx_k_effective_words[] = "effective_words"; static const char __pyx_k_reduced_windows[] = "reduced_windows"; static const char __pyx_k_subwords_idx_len[] = "subwords_idx_len"; static const char __pyx_k_syn0_vocab_lockf[] = "syn0_vocab_lockf"; +static const char __pyx_k_train_batch_cbow[] = "train_batch_cbow"; static const char __pyx_k_word_locks_vocab[] = "word_locks_vocab"; static const char __pyx_k_scipy_linalg_blas[] = "scipy.linalg.blas"; static const char __pyx_k_syn0_ngrams_lockf[] = "syn0_ngrams_lockf"; @@ -1753,10 +1755,11 @@ static PyObject *__pyx_kp_u_Non_native_byte_order_not_suppor; static PyObject *__pyx_n_s_REAL; static PyObject *__pyx_n_s_RuntimeError; static PyObject *__pyx_n_s_ValueError; -static PyObject *__pyx_n_s__12; +static PyObject *__pyx_n_s__14; static PyObject *__pyx_n_s_alpha; static PyObject *__pyx_n_s_alpha_2; static PyObject *__pyx_n_s_array; +static PyObject *__pyx_n_s_cbow_mean; static PyObject *__pyx_n_s_cline_in_traceback; static PyObject *__pyx_n_s_code; static PyObject *__pyx_n_s_codelens; @@ -1795,6 +1798,8 @@ static PyObject *__pyx_n_s_model; static PyObject *__pyx_kp_u_ndarray_is_not_C_contiguous; static PyObject *__pyx_kp_u_ndarray_is_not_Fortran_contiguou; static PyObject *__pyx_n_s_negative; +static PyObject *__pyx_n_s_neu1; +static PyObject *__pyx_n_s_neu1_2; static PyObject *__pyx_n_s_next_random; static PyObject *__pyx_n_s_ngrams; static PyObject *__pyx_n_s_ngrams_word; @@ -1817,6 +1822,7 @@ static PyObject *__pyx_n_s_sent_idx; static PyObject *__pyx_n_s_sentence_idx; static PyObject *__pyx_n_s_sentences; static PyObject *__pyx_n_s_size; +static PyObject *__pyx_n_s_subword_arrays; static PyObject *__pyx_n_s_subword_i; static PyObject *__pyx_n_s_subwords; static PyObject *__pyx_n_s_subwords_idx; @@ -1829,6 +1835,7 @@ static PyObject *__pyx_n_s_syn1; static PyObject *__pyx_n_s_syn1neg; static PyObject *__pyx_n_s_test; static PyObject *__pyx_n_s_token; +static PyObject *__pyx_n_s_train_batch_cbow; static PyObject *__pyx_n_s_train_batch_sg; static PyObject *__pyx_n_s_uint32; static PyObject *__pyx_kp_u_unknown_dtype_code_in_numpy_pxd; @@ -1846,7 +1853,8 @@ static PyObject *__pyx_n_s_wv; static PyObject *__pyx_n_s_x; static PyObject *__pyx_n_s_y; static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentences, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v__l1); /* proto */ -static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED PyObject *__pyx_self); /* proto */ +static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentences, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v__neu1); /* proto */ +static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED PyObject *__pyx_self); /* proto */ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info); /* proto */ static PyObject *__pyx_int_0; @@ -1865,13 +1873,17 @@ static PyObject *__pyx_tuple__8; static PyObject *__pyx_tuple__9; static PyObject *__pyx_tuple__10; static PyObject *__pyx_tuple__11; +static PyObject *__pyx_tuple__12; static PyObject *__pyx_tuple__13; static PyObject *__pyx_tuple__15; -static PyObject *__pyx_codeobj__14; +static PyObject *__pyx_tuple__17; +static PyObject *__pyx_tuple__19; static PyObject *__pyx_codeobj__16; +static PyObject *__pyx_codeobj__18; +static PyObject *__pyx_codeobj__20; -/* "gensim/models/fasttext_inner.pyx":44 - * cdef REAL_t ONEF = 1.0 +/* "gensim/models/fasttext_inner.pyx":48 + * * * cdef unsigned long long fast_sentence_sg_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, @@ -1896,10 +1908,9 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente int __pyx_t_2; long __pyx_t_3; int __pyx_t_4; - unsigned PY_LONG_LONG __pyx_t_5; - int __pyx_t_6; + int __pyx_t_5; - /* "gensim/models/fasttext_inner.pyx":52 + /* "gensim/models/fasttext_inner.pyx":56 * * cdef long long a * cdef np.uint32_t word2_index = subwords_index[0] # <<<<<<<<<<<<<< @@ -1908,7 +1919,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_word2_index = (__pyx_v_subwords_index[0]); - /* "gensim/models/fasttext_inner.pyx":53 + /* "gensim/models/fasttext_inner.pyx":57 * cdef long long a * cdef np.uint32_t word2_index = subwords_index[0] * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< @@ -1917,7 +1928,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":54 + /* "gensim/models/fasttext_inner.pyx":58 * cdef np.uint32_t word2_index = subwords_index[0] * cdef long long row1 = word2_index * size, row2 * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< @@ -1926,7 +1937,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_modulo = 281474976710655ULL; - /* "gensim/models/fasttext_inner.pyx":59 + /* "gensim/models/fasttext_inner.pyx":63 * cdef int d * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -1935,7 +1946,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":60 + /* "gensim/models/fasttext_inner.pyx":64 * * memset(work, 0, size * cython.sizeof(REAL_t)) * memset(l1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -1944,7 +1955,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ memset(__pyx_v_l1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":62 + /* "gensim/models/fasttext_inner.pyx":66 * memset(l1, 0, size * cython.sizeof(REAL_t)) * * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) # <<<<<<<<<<<<<< @@ -1953,7 +1964,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_scopy((&__pyx_v_size), (&(__pyx_v_syn0_vocab[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":63 + /* "gensim/models/fasttext_inner.pyx":67 * * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) * for d in range(1, subwords_len): # <<<<<<<<<<<<<< @@ -1964,7 +1975,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":64 + /* "gensim/models/fasttext_inner.pyx":68 * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) * for d in range(1, subwords_len): * subword_row = subwords_index[d] * size # <<<<<<<<<<<<<< @@ -1973,7 +1984,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_subword_row = ((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":65 + /* "gensim/models/fasttext_inner.pyx":69 * for d in range(1, subwords_len): * subword_row = subwords_index[d] * size * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) # <<<<<<<<<<<<<< @@ -1983,26 +1994,16 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_ngrams[__pyx_v_subword_row])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); } - /* "gensim/models/fasttext_inner.pyx":66 + /* "gensim/models/fasttext_inner.pyx":70 * subword_row = subwords_index[d] * size * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) * cdef REAL_t norm_factor = ONEF / subwords_len # <<<<<<<<<<<<<< * sscal(&size, &norm_factor, l1 , &ONE) * */ - if (unlikely(__pyx_v_subwords_len == 0)) { - #ifdef WITH_THREAD - PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure(); - #endif - PyErr_SetString(PyExc_ZeroDivisionError, "float division"); - #ifdef WITH_THREAD - __Pyx_PyGILState_Release(__pyx_gilstate_save); - #endif - __PYX_ERR(0, 66, __pyx_L1_error) - } __pyx_v_norm_factor = (__pyx_v_6gensim_6models_14fasttext_inner_ONEF / __pyx_v_subwords_len); - /* "gensim/models/fasttext_inner.pyx":67 + /* "gensim/models/fasttext_inner.pyx":71 * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) * cdef REAL_t norm_factor = ONEF / subwords_len * sscal(&size, &norm_factor, l1 , &ONE) # <<<<<<<<<<<<<< @@ -2011,7 +2012,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_norm_factor), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":69 + /* "gensim/models/fasttext_inner.pyx":73 * sscal(&size, &norm_factor, l1 , &ONE) * * for d in range(negative+1): # <<<<<<<<<<<<<< @@ -2022,7 +2023,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_3; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":70 + /* "gensim/models/fasttext_inner.pyx":74 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -2032,7 +2033,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_4 = ((__pyx_v_d == 0) != 0); if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":71 + /* "gensim/models/fasttext_inner.pyx":75 * for d in range(negative+1): * if d == 0: * target_index = word_index # <<<<<<<<<<<<<< @@ -2041,7 +2042,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_target_index = __pyx_v_word_index; - /* "gensim/models/fasttext_inner.pyx":72 + /* "gensim/models/fasttext_inner.pyx":76 * if d == 0: * target_index = word_index * label = ONEF # <<<<<<<<<<<<<< @@ -2050,7 +2051,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_label = __pyx_v_6gensim_6models_14fasttext_inner_ONEF; - /* "gensim/models/fasttext_inner.pyx":70 + /* "gensim/models/fasttext_inner.pyx":74 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -2060,7 +2061,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente goto __pyx_L7; } - /* "gensim/models/fasttext_inner.pyx":74 + /* "gensim/models/fasttext_inner.pyx":78 * label = ONEF * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) # <<<<<<<<<<<<<< @@ -2068,21 +2069,9 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente * if target_index == word_index: */ /*else*/ { - __pyx_t_5 = (__pyx_v_next_random >> 16); - __pyx_t_1 = (__pyx_v_cum_table[(__pyx_v_cum_table_len - 1)]); - if (unlikely(__pyx_t_1 == 0)) { - #ifdef WITH_THREAD - PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure(); - #endif - PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero"); - #ifdef WITH_THREAD - __Pyx_PyGILState_Release(__pyx_gilstate_save); - #endif - __PYX_ERR(0, 74, __pyx_L1_error) - } - __pyx_v_target_index = __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_v_cum_table, (__pyx_t_5 % __pyx_t_1), 0, __pyx_v_cum_table_len); + __pyx_v_target_index = __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_v_cum_table, ((__pyx_v_next_random >> 16) % (__pyx_v_cum_table[(__pyx_v_cum_table_len - 1)])), 0, __pyx_v_cum_table_len); - /* "gensim/models/fasttext_inner.pyx":75 + /* "gensim/models/fasttext_inner.pyx":79 * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< @@ -2091,7 +2080,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "gensim/models/fasttext_inner.pyx":76 + /* "gensim/models/fasttext_inner.pyx":80 * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -2101,7 +2090,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_4 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":77 + /* "gensim/models/fasttext_inner.pyx":81 * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: * continue # <<<<<<<<<<<<<< @@ -2110,7 +2099,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ goto __pyx_L5_continue; - /* "gensim/models/fasttext_inner.pyx":76 + /* "gensim/models/fasttext_inner.pyx":80 * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -2119,7 +2108,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":78 + /* "gensim/models/fasttext_inner.pyx":82 * if target_index == word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< @@ -2130,7 +2119,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente } __pyx_L7:; - /* "gensim/models/fasttext_inner.pyx":80 + /* "gensim/models/fasttext_inner.pyx":84 * label = 0.0 * * row2 = target_index * size # <<<<<<<<<<<<<< @@ -2139,7 +2128,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":81 + /* "gensim/models/fasttext_inner.pyx":85 * * row2 = target_index * size * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2148,25 +2137,25 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_f_dot = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":82 + /* "gensim/models/fasttext_inner.pyx":86 * row2 = target_index * size * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] */ - __pyx_t_6 = ((__pyx_v_f_dot <= -6.0) != 0); - if (!__pyx_t_6) { + __pyx_t_5 = ((__pyx_v_f_dot <= -6.0) != 0); + if (!__pyx_t_5) { } else { - __pyx_t_4 = __pyx_t_6; + __pyx_t_4 = __pyx_t_5; goto __pyx_L10_bool_binop_done; } - __pyx_t_6 = ((__pyx_v_f_dot >= 6.0) != 0); - __pyx_t_4 = __pyx_t_6; + __pyx_t_5 = ((__pyx_v_f_dot >= 6.0) != 0); + __pyx_t_4 = __pyx_t_5; __pyx_L10_bool_binop_done:; if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":83 + /* "gensim/models/fasttext_inner.pyx":87 * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2175,7 +2164,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ goto __pyx_L5_continue; - /* "gensim/models/fasttext_inner.pyx":82 + /* "gensim/models/fasttext_inner.pyx":86 * row2 = target_index * size * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2184,7 +2173,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":84 + /* "gensim/models/fasttext_inner.pyx":88 * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2193,7 +2182,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f_dot + 6.0) * 83.0))]); - /* "gensim/models/fasttext_inner.pyx":85 + /* "gensim/models/fasttext_inner.pyx":89 * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< @@ -2202,7 +2191,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/fasttext_inner.pyx":86 + /* "gensim/models/fasttext_inner.pyx":90 * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2211,7 +2200,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":87 + /* "gensim/models/fasttext_inner.pyx":91 * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, l1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2222,7 +2211,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_L5_continue:; } - /* "gensim/models/fasttext_inner.pyx":88 + /* "gensim/models/fasttext_inner.pyx":92 * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, l1, &ONE, &syn1neg[row2], &ONE) * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) # <<<<<<<<<<<<<< @@ -2231,7 +2220,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[__pyx_v_word2_index])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":89 + /* "gensim/models/fasttext_inner.pyx":93 * our_saxpy(&size, &g, l1, &ONE, &syn1neg[row2], &ONE) * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) * for d in range(1, subwords_len): # <<<<<<<<<<<<<< @@ -2242,7 +2231,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":90 + /* "gensim/models/fasttext_inner.pyx":94 * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) * for d in range(1, subwords_len): * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) # <<<<<<<<<<<<<< @@ -2252,18 +2241,18 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_ngrams[(__pyx_v_subwords_index[__pyx_v_d])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_ngrams[((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); } - /* "gensim/models/fasttext_inner.pyx":91 + /* "gensim/models/fasttext_inner.pyx":95 * for d in range(1, subwords_len): * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) * return next_random # <<<<<<<<<<<<<< * - * cdef void fast_sentence_sg_hs( + * */ __pyx_r = __pyx_v_next_random; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":44 - * cdef REAL_t ONEF = 1.0 + /* "gensim/models/fasttext_inner.pyx":48 + * * * cdef unsigned long long fast_sentence_sg_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, @@ -2271,15 +2260,12 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ /* function exit code */ - __pyx_L1_error:; - __Pyx_WriteUnraisable("gensim.models.fasttext_inner.fast_sentence_sg_neg", __pyx_clineno, __pyx_lineno, __pyx_filename, 1, 1); - __pyx_r = 0; __pyx_L0:; return __pyx_r; } -/* "gensim/models/fasttext_inner.pyx":93 - * return next_random +/* "gensim/models/fasttext_inner.pyx":98 + * * * cdef void fast_sentence_sg_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, @@ -2304,7 +2290,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t int __pyx_t_5; int __pyx_t_6; - /* "gensim/models/fasttext_inner.pyx":101 + /* "gensim/models/fasttext_inner.pyx":106 * * cdef long long a, b * cdef np.uint32_t word2_index = subwords_index[0] # <<<<<<<<<<<<<< @@ -2313,7 +2299,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_word2_index = (__pyx_v_subwords_index[0]); - /* "gensim/models/fasttext_inner.pyx":102 + /* "gensim/models/fasttext_inner.pyx":107 * cdef long long a, b * cdef np.uint32_t word2_index = subwords_index[0] * cdef long long row1 = word2_index * size, row2, sgn # <<<<<<<<<<<<<< @@ -2322,7 +2308,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":105 + /* "gensim/models/fasttext_inner.pyx":110 * cdef REAL_t f, g, f_dot, lprob * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2331,7 +2317,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":106 + /* "gensim/models/fasttext_inner.pyx":111 * * memset(work, 0, size * cython.sizeof(REAL_t)) * memset(l1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2340,7 +2326,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ memset(__pyx_v_l1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":108 + /* "gensim/models/fasttext_inner.pyx":113 * memset(l1, 0, size * cython.sizeof(REAL_t)) * * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) # <<<<<<<<<<<<<< @@ -2349,7 +2335,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_6gensim_6models_14word2vec_inner_scopy((&__pyx_v_size), (&(__pyx_v_syn0_vocab[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":109 + /* "gensim/models/fasttext_inner.pyx":114 * * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) * for d in range(1, subwords_len): # <<<<<<<<<<<<<< @@ -2360,7 +2346,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":110 + /* "gensim/models/fasttext_inner.pyx":115 * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) * for d in range(1, subwords_len): * subword_row = subwords_index[d] * size # <<<<<<<<<<<<<< @@ -2369,7 +2355,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_subword_row = ((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":111 + /* "gensim/models/fasttext_inner.pyx":116 * for d in range(1, subwords_len): * subword_row = subwords_index[d] * size * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) # <<<<<<<<<<<<<< @@ -2379,26 +2365,16 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_ngrams[__pyx_v_subword_row])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); } - /* "gensim/models/fasttext_inner.pyx":112 + /* "gensim/models/fasttext_inner.pyx":117 * subword_row = subwords_index[d] * size * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) * cdef REAL_t norm_factor = ONEF / subwords_len # <<<<<<<<<<<<<< * sscal(&size, &norm_factor, l1 , &ONE) * */ - if (unlikely(__pyx_v_subwords_len == 0)) { - #ifdef WITH_THREAD - PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure(); - #endif - PyErr_SetString(PyExc_ZeroDivisionError, "float division"); - #ifdef WITH_THREAD - __Pyx_PyGILState_Release(__pyx_gilstate_save); - #endif - __PYX_ERR(0, 112, __pyx_L1_error) - } __pyx_v_norm_factor = (__pyx_v_6gensim_6models_14fasttext_inner_ONEF / __pyx_v_subwords_len); - /* "gensim/models/fasttext_inner.pyx":113 + /* "gensim/models/fasttext_inner.pyx":118 * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) * cdef REAL_t norm_factor = ONEF / subwords_len * sscal(&size, &norm_factor, l1 , &ONE) # <<<<<<<<<<<<<< @@ -2407,7 +2383,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_norm_factor), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":115 + /* "gensim/models/fasttext_inner.pyx":120 * sscal(&size, &norm_factor, l1 , &ONE) * * for b in range(codelen): # <<<<<<<<<<<<<< @@ -2418,7 +2394,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { __pyx_v_b = __pyx_t_4; - /* "gensim/models/fasttext_inner.pyx":116 + /* "gensim/models/fasttext_inner.pyx":121 * * for b in range(codelen): * row2 = word_point[b] * size # <<<<<<<<<<<<<< @@ -2427,7 +2403,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":117 + /* "gensim/models/fasttext_inner.pyx":122 * for b in range(codelen): * row2 = word_point[b] * size * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -2436,7 +2412,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_f_dot = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":118 + /* "gensim/models/fasttext_inner.pyx":123 * row2 = word_point[b] * size * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2454,7 +2430,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t __pyx_L8_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":119 + /* "gensim/models/fasttext_inner.pyx":124 * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2463,7 +2439,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ goto __pyx_L5_continue; - /* "gensim/models/fasttext_inner.pyx":118 + /* "gensim/models/fasttext_inner.pyx":123 * row2 = word_point[b] * size * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2472,7 +2448,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ } - /* "gensim/models/fasttext_inner.pyx":120 + /* "gensim/models/fasttext_inner.pyx":125 * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2481,7 +2457,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f_dot + 6.0) * 83.0))]); - /* "gensim/models/fasttext_inner.pyx":121 + /* "gensim/models/fasttext_inner.pyx":126 * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< @@ -2490,7 +2466,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/fasttext_inner.pyx":123 + /* "gensim/models/fasttext_inner.pyx":128 * g = (1 - word_code[b] - f) * alpha * * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2499,7 +2475,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":124 + /* "gensim/models/fasttext_inner.pyx":129 * * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, l1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -2510,7 +2486,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t __pyx_L5_continue:; } - /* "gensim/models/fasttext_inner.pyx":126 + /* "gensim/models/fasttext_inner.pyx":131 * our_saxpy(&size, &g, l1, &ONE, &syn1[row2], &ONE) * * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) # <<<<<<<<<<<<<< @@ -2519,7 +2495,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[__pyx_v_word2_index])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":127 + /* "gensim/models/fasttext_inner.pyx":132 * * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) * for d in range(1, subwords_len): # <<<<<<<<<<<<<< @@ -2530,18 +2506,18 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":128 + /* "gensim/models/fasttext_inner.pyx":133 * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) * for d in range(1, subwords_len): * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) # <<<<<<<<<<<<<< * - * def train_batch_sg(model, sentences, alpha, _work, _l1): + * */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_ngrams[(__pyx_v_subwords_index[__pyx_v_d])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_ngrams[((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); } - /* "gensim/models/fasttext_inner.pyx":93 - * return next_random + /* "gensim/models/fasttext_inner.pyx":98 + * * * cdef void fast_sentence_sg_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, @@ -2549,125 +2525,2615 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ /* function exit code */ - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_WriteUnraisable("gensim.models.fasttext_inner.fast_sentence_sg_hs", __pyx_clineno, __pyx_lineno, __pyx_filename, 1, 1); - __pyx_L0:; } -/* "gensim/models/fasttext_inner.pyx":130 - * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) +/* "gensim/models/fasttext_inner.pyx":136 * - * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< - * cdef int hs = model.hs - * cdef int negative = model.negative + * + * cdef unsigned long long fast_sentence_cbow_neg( # <<<<<<<<<<<<<< + * const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, int codelens[MAX_SENTENCE_LEN], + * REAL_t *neu1, REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1neg, const int size, */ -/* Python wrapper */ -static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_1train_batch_sg(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyMethodDef __pyx_mdef_6gensim_6models_14fasttext_inner_1train_batch_sg = {"train_batch_sg", (PyCFunction)__pyx_pw_6gensim_6models_14fasttext_inner_1train_batch_sg, METH_VARARGS|METH_KEYWORDS, 0}; -static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_1train_batch_sg(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_model = 0; - PyObject *__pyx_v_sentences = 0; - PyObject *__pyx_v_alpha = 0; - PyObject *__pyx_v__work = 0; - PyObject *__pyx_v__l1 = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("train_batch_sg (wrapper)", 0); - { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_sentences,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_l1,0}; - PyObject* values[5] = {0,0,0,0,0}; - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args; - const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); - switch (pos_args) { - case 5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4); - CYTHON_FALLTHROUGH; - case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); - CYTHON_FALLTHROUGH; - case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - CYTHON_FALLTHROUGH; - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - CYTHON_FALLTHROUGH; - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = PyDict_Size(__pyx_kwds); - switch (pos_args) { - case 0: - if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_model)) != 0)) kw_args--; - else goto __pyx_L5_argtuple_error; - CYTHON_FALLTHROUGH; - case 1: - if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentences)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 1); __PYX_ERR(0, 130, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 2: - if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 2); __PYX_ERR(0, 130, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 3: - if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 3); __PYX_ERR(0, 130, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 4: - if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_l1)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 4); __PYX_ERR(0, 130, __pyx_L3_error) - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_batch_sg") < 0)) __PYX_ERR(0, 130, __pyx_L3_error) - } - } else if (PyTuple_GET_SIZE(__pyx_args) != 5) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - values[3] = PyTuple_GET_ITEM(__pyx_args, 3); - values[4] = PyTuple_GET_ITEM(__pyx_args, 4); - } - __pyx_v_model = values[0]; - __pyx_v_sentences = values[1]; - __pyx_v_alpha = values[2]; - __pyx_v__work = values[3]; - __pyx_v__l1 = values[4]; - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 130, __pyx_L3_error) - __pyx_L3_error:; - __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_sg", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(__pyx_self, __pyx_v_model, __pyx_v_sentences, __pyx_v_alpha, __pyx_v__work, __pyx_v__l1); +static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_cum_table, unsigned PY_LONG_LONG __pyx_v_cum_table_len, CYTHON_UNUSED int *__pyx_v_codelens, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_vocab, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_ngrams, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5numpy_uint32_t const **__pyx_v_subwords_idx, int const *__pyx_v_subwords_idx_len, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, unsigned PY_LONG_LONG __pyx_v_next_random, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_ngrams, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_vocab) { + PY_LONG_LONG __pyx_v_row2; + unsigned PY_LONG_LONG __pyx_v_modulo; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_g; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_count; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_inv_count; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_label; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f_dot; + __pyx_t_5numpy_uint32_t __pyx_v_target_index; + __pyx_t_5numpy_uint32_t __pyx_v_word_index; + int __pyx_v_d; + int __pyx_v_m; + unsigned PY_LONG_LONG __pyx_r; + int __pyx_t_1; + int __pyx_t_2; + int __pyx_t_3; + int __pyx_t_4; + int __pyx_t_5; + long __pyx_t_6; + int __pyx_t_7; - /* function exit code */ + /* "gensim/models/fasttext_inner.pyx":145 + * cdef long long a + * cdef long long row2 + * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< + * cdef REAL_t f, g, count, inv_count = 1.0, label, log_e_f_dot, f_dot + * cdef np.uint32_t target_index, word_index + */ + __pyx_v_modulo = 281474976710655ULL; + + /* "gensim/models/fasttext_inner.pyx":146 + * cdef long long row2 + * cdef unsigned long long modulo = 281474976710655ULL + * cdef REAL_t f, g, count, inv_count = 1.0, label, log_e_f_dot, f_dot # <<<<<<<<<<<<<< + * cdef np.uint32_t target_index, word_index + * cdef int d, m + */ + __pyx_v_inv_count = 1.0; + + /* "gensim/models/fasttext_inner.pyx":150 + * cdef int d, m + * + * word_index = indexes[i] # <<<<<<<<<<<<<< + * + * memset(neu1, 0, size * cython.sizeof(REAL_t)) + */ + __pyx_v_word_index = (__pyx_v_indexes[__pyx_v_i]); + + /* "gensim/models/fasttext_inner.pyx":152 + * word_index = indexes[i] + * + * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< + * count = 0.0 + * for m in range(j, k): + */ + memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); + + /* "gensim/models/fasttext_inner.pyx":153 + * + * memset(neu1, 0, size * cython.sizeof(REAL_t)) + * count = 0.0 # <<<<<<<<<<<<<< + * for m in range(j, k): + * if m == i: + */ + __pyx_v_count = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); + + /* "gensim/models/fasttext_inner.pyx":154 + * memset(neu1, 0, size * cython.sizeof(REAL_t)) + * count = 0.0 + * for m in range(j, k): # <<<<<<<<<<<<<< + * if m == i: + * continue + */ + __pyx_t_1 = __pyx_v_k; + for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { + __pyx_v_m = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":155 + * count = 0.0 + * for m in range(j, k): + * if m == i: # <<<<<<<<<<<<<< + * continue + * else: + */ + __pyx_t_3 = ((__pyx_v_m == __pyx_v_i) != 0); + if (__pyx_t_3) { + + /* "gensim/models/fasttext_inner.pyx":156 + * for m in range(j, k): + * if m == i: + * continue # <<<<<<<<<<<<<< + * else: + * count += ONEF + */ + goto __pyx_L3_continue; + + /* "gensim/models/fasttext_inner.pyx":155 + * count = 0.0 + * for m in range(j, k): + * if m == i: # <<<<<<<<<<<<<< + * continue + * else: + */ + } + + /* "gensim/models/fasttext_inner.pyx":158 + * continue + * else: + * count += ONEF # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) + * for d in range(1, subwords_idx_len[m]): + */ + /*else*/ { + __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14fasttext_inner_ONEF); + + /* "gensim/models/fasttext_inner.pyx":159 + * else: + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< + * for d in range(1, subwords_idx_len[m]): + * count += ONEF + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":160 + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) + * for d in range(1, subwords_idx_len[m]): # <<<<<<<<<<<<<< + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + */ + __pyx_t_4 = (__pyx_v_subwords_idx_len[__pyx_v_m]); + for (__pyx_t_5 = 1; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { + __pyx_v_d = __pyx_t_5; + + /* "gensim/models/fasttext_inner.pyx":161 + * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) + * for d in range(1, subwords_idx_len[m]): + * count += ONEF # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + * + */ + __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14fasttext_inner_ONEF); + + /* "gensim/models/fasttext_inner.pyx":162 + * for d in range(1, subwords_idx_len[m]): + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< + * + * if count > (0.5): + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_ngrams[(((__pyx_v_subwords_idx[__pyx_v_m])[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + } + } + __pyx_L3_continue:; + } + + /* "gensim/models/fasttext_inner.pyx":164 + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + * + * if count > (0.5): # <<<<<<<<<<<<<< + * inv_count = ONEF/count + * if cbow_mean: + */ + __pyx_t_3 = ((__pyx_v_count > ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); + if (__pyx_t_3) { + + /* "gensim/models/fasttext_inner.pyx":165 + * + * if count > (0.5): + * inv_count = ONEF/count # <<<<<<<<<<<<<< + * if cbow_mean: + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + */ + __pyx_v_inv_count = (__pyx_v_6gensim_6models_14fasttext_inner_ONEF / __pyx_v_count); + + /* "gensim/models/fasttext_inner.pyx":164 + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + * + * if count > (0.5): # <<<<<<<<<<<<<< + * inv_count = ONEF/count + * if cbow_mean: + */ + } + + /* "gensim/models/fasttext_inner.pyx":166 + * if count > (0.5): + * inv_count = ONEF/count + * if cbow_mean: # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * + */ + __pyx_t_3 = (__pyx_v_cbow_mean != 0); + if (__pyx_t_3) { + + /* "gensim/models/fasttext_inner.pyx":167 + * inv_count = ONEF/count + * if cbow_mean: + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< + * + * memset(work, 0, size * cython.sizeof(REAL_t)) + */ + __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":166 + * if count > (0.5): + * inv_count = ONEF/count + * if cbow_mean: # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * + */ + } + + /* "gensim/models/fasttext_inner.pyx":169 + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * + * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< + * + * for d in range(negative+1): + */ + memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); + + /* "gensim/models/fasttext_inner.pyx":171 + * memset(work, 0, size * cython.sizeof(REAL_t)) + * + * for d in range(negative+1): # <<<<<<<<<<<<<< + * if d == 0: + * target_index = word_index + */ + __pyx_t_6 = (__pyx_v_negative + 1); + for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_6; __pyx_t_1+=1) { + __pyx_v_d = __pyx_t_1; + + /* "gensim/models/fasttext_inner.pyx":172 + * + * for d in range(negative+1): + * if d == 0: # <<<<<<<<<<<<<< + * target_index = word_index + * label = ONEF + */ + __pyx_t_3 = ((__pyx_v_d == 0) != 0); + if (__pyx_t_3) { + + /* "gensim/models/fasttext_inner.pyx":173 + * for d in range(negative+1): + * if d == 0: + * target_index = word_index # <<<<<<<<<<<<<< + * label = ONEF + * else: + */ + __pyx_v_target_index = __pyx_v_word_index; + + /* "gensim/models/fasttext_inner.pyx":174 + * if d == 0: + * target_index = word_index + * label = ONEF # <<<<<<<<<<<<<< + * else: + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + */ + __pyx_v_label = __pyx_v_6gensim_6models_14fasttext_inner_ONEF; + + /* "gensim/models/fasttext_inner.pyx":172 + * + * for d in range(negative+1): + * if d == 0: # <<<<<<<<<<<<<< + * target_index = word_index + * label = ONEF + */ + goto __pyx_L12; + } + + /* "gensim/models/fasttext_inner.pyx":176 + * label = ONEF + * else: + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) # <<<<<<<<<<<<<< + * next_random = (next_random * 25214903917ULL + 11) & modulo + * if target_index == word_index: + */ + /*else*/ { + __pyx_v_target_index = __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_v_cum_table, ((__pyx_v_next_random >> 16) % (__pyx_v_cum_table[(__pyx_v_cum_table_len - 1)])), 0, __pyx_v_cum_table_len); + + /* "gensim/models/fasttext_inner.pyx":177 + * else: + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< + * if target_index == word_index: + * continue + */ + __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); + + /* "gensim/models/fasttext_inner.pyx":178 + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + * next_random = (next_random * 25214903917ULL + 11) & modulo + * if target_index == word_index: # <<<<<<<<<<<<<< + * continue + * label = 0.0 + */ + __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); + if (__pyx_t_3) { + + /* "gensim/models/fasttext_inner.pyx":179 + * next_random = (next_random * 25214903917ULL + 11) & modulo + * if target_index == word_index: + * continue # <<<<<<<<<<<<<< + * label = 0.0 + * + */ + goto __pyx_L10_continue; + + /* "gensim/models/fasttext_inner.pyx":178 + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + * next_random = (next_random * 25214903917ULL + 11) & modulo + * if target_index == word_index: # <<<<<<<<<<<<<< + * continue + * label = 0.0 + */ + } + + /* "gensim/models/fasttext_inner.pyx":180 + * if target_index == word_index: + * continue + * label = 0.0 # <<<<<<<<<<<<<< + * + * row2 = target_index * size + */ + __pyx_v_label = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); + } + __pyx_L12:; + + /* "gensim/models/fasttext_inner.pyx":182 + * label = 0.0 + * + * row2 = target_index * size # <<<<<<<<<<<<<< + * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: + */ + __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); + + /* "gensim/models/fasttext_inner.pyx":183 + * + * row2 = target_index * size + * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: + * continue + */ + __pyx_v_f_dot = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":184 + * row2 = target_index * size + * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< + * continue + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + */ + __pyx_t_7 = ((__pyx_v_f_dot <= -6.0) != 0); + if (!__pyx_t_7) { + } else { + __pyx_t_3 = __pyx_t_7; + goto __pyx_L15_bool_binop_done; + } + __pyx_t_7 = ((__pyx_v_f_dot >= 6.0) != 0); + __pyx_t_3 = __pyx_t_7; + __pyx_L15_bool_binop_done:; + if (__pyx_t_3) { + + /* "gensim/models/fasttext_inner.pyx":185 + * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: + * continue # <<<<<<<<<<<<<< + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + * g = (label - f) * alpha + */ + goto __pyx_L10_continue; + + /* "gensim/models/fasttext_inner.pyx":184 + * row2 = target_index * size + * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< + * continue + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + */ + } + + /* "gensim/models/fasttext_inner.pyx":186 + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: + * continue + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< + * g = (label - f) * alpha + * + */ + __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f_dot + 6.0) * 83.0))]); + + /* "gensim/models/fasttext_inner.pyx":187 + * continue + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + * g = (label - f) * alpha # <<<<<<<<<<<<<< + * + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + */ + __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); + + /* "gensim/models/fasttext_inner.pyx":189 + * g = (label - f) * alpha + * + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + * + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":190 + * + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< + * + * if not cbow_mean: # divide error over summed window vectors + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + __pyx_L10_continue:; + } + + /* "gensim/models/fasttext_inner.pyx":192 + * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + * + * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + * + */ + __pyx_t_3 = ((!(__pyx_v_cbow_mean != 0)) != 0); + if (__pyx_t_3) { + + /* "gensim/models/fasttext_inner.pyx":193 + * + * if not cbow_mean: # divide error over summed window vectors + * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< + * + * for m in range(j,k): + */ + __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":192 + * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + * + * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + * + */ + } + + /* "gensim/models/fasttext_inner.pyx":195 + * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + * + * for m in range(j,k): # <<<<<<<<<<<<<< + * if m == i: + * continue + */ + __pyx_t_1 = __pyx_v_k; + for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { + __pyx_v_m = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":196 + * + * for m in range(j,k): + * if m == i: # <<<<<<<<<<<<<< + * continue + * else: + */ + __pyx_t_3 = ((__pyx_v_m == __pyx_v_i) != 0); + if (__pyx_t_3) { + + /* "gensim/models/fasttext_inner.pyx":197 + * for m in range(j,k): + * if m == i: + * continue # <<<<<<<<<<<<<< + * else: + * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) + */ + goto __pyx_L18_continue; + + /* "gensim/models/fasttext_inner.pyx":196 + * + * for m in range(j,k): + * if m == i: # <<<<<<<<<<<<<< + * continue + * else: + */ + } + + /* "gensim/models/fasttext_inner.pyx":199 + * continue + * else: + * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< + * for d in range(1, subwords_idx_len[m]): + * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) + */ + /*else*/ { + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":200 + * else: + * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) + * for d in range(1, subwords_idx_len[m]): # <<<<<<<<<<<<<< + * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) + * + */ + __pyx_t_4 = (__pyx_v_subwords_idx_len[__pyx_v_m]); + for (__pyx_t_5 = 1; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { + __pyx_v_d = __pyx_t_5; + + /* "gensim/models/fasttext_inner.pyx":201 + * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) + * for d in range(1, subwords_idx_len[m]): + * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) # <<<<<<<<<<<<<< + * + * return next_random + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_ngrams[((__pyx_v_subwords_idx[__pyx_v_m])[__pyx_v_d])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_ngrams[(((__pyx_v_subwords_idx[__pyx_v_m])[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + } + } + __pyx_L18_continue:; + } + + /* "gensim/models/fasttext_inner.pyx":203 + * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) + * + * return next_random # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = __pyx_v_next_random; + goto __pyx_L0; + + /* "gensim/models/fasttext_inner.pyx":136 + * + * + * cdef unsigned long long fast_sentence_cbow_neg( # <<<<<<<<<<<<<< + * const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, int codelens[MAX_SENTENCE_LEN], + * REAL_t *neu1, REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1neg, const int size, + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "gensim/models/fasttext_inner.pyx":206 + * + * + * cdef void fast_sentence_cbow_hs( # <<<<<<<<<<<<<< + * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], + * REAL_t *neu1, REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1, const int size, + */ + +static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int *__pyx_v_codelens, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_vocab, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_ngrams, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5numpy_uint32_t const **__pyx_v_subwords_idx, int const *__pyx_v_subwords_idx_len, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_ngrams, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_vocab) { + PY_LONG_LONG __pyx_v_b; + PY_LONG_LONG __pyx_v_row2; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_g; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_count; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_inv_count; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f_dot; + int __pyx_v_m; + long __pyx_v_d; + int __pyx_t_1; + int __pyx_t_2; + int __pyx_t_3; + int __pyx_t_4; + long __pyx_t_5; + PY_LONG_LONG __pyx_t_6; + int __pyx_t_7; + + /* "gensim/models/fasttext_inner.pyx":215 + * cdef long long a, b + * cdef long long row2, sgn + * cdef REAL_t f, g, count, inv_count = 1.0, f_dot, lprob # <<<<<<<<<<<<<< + * cdef int m + * + */ + __pyx_v_inv_count = 1.0; + + /* "gensim/models/fasttext_inner.pyx":218 + * cdef int m + * + * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< + * count = 0.0 + * for m in range(j, k): + */ + memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); + + /* "gensim/models/fasttext_inner.pyx":219 + * + * memset(neu1, 0, size * cython.sizeof(REAL_t)) + * count = 0.0 # <<<<<<<<<<<<<< + * for m in range(j, k): + * if m == i: + */ + __pyx_v_count = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); + + /* "gensim/models/fasttext_inner.pyx":220 + * memset(neu1, 0, size * cython.sizeof(REAL_t)) + * count = 0.0 + * for m in range(j, k): # <<<<<<<<<<<<<< + * if m == i: + * continue + */ + __pyx_t_1 = __pyx_v_k; + for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { + __pyx_v_m = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":221 + * count = 0.0 + * for m in range(j, k): + * if m == i: # <<<<<<<<<<<<<< + * continue + * else: + */ + __pyx_t_3 = ((__pyx_v_m == __pyx_v_i) != 0); + if (__pyx_t_3) { + + /* "gensim/models/fasttext_inner.pyx":222 + * for m in range(j, k): + * if m == i: + * continue # <<<<<<<<<<<<<< + * else: + * count += ONEF + */ + goto __pyx_L3_continue; + + /* "gensim/models/fasttext_inner.pyx":221 + * count = 0.0 + * for m in range(j, k): + * if m == i: # <<<<<<<<<<<<<< + * continue + * else: + */ + } + + /* "gensim/models/fasttext_inner.pyx":224 + * continue + * else: + * count += ONEF # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) + * for d in range(1, subwords_idx_len[m]): + */ + /*else*/ { + __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14fasttext_inner_ONEF); + + /* "gensim/models/fasttext_inner.pyx":225 + * else: + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< + * for d in range(1, subwords_idx_len[m]): + * count += ONEF + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":226 + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) + * for d in range(1, subwords_idx_len[m]): # <<<<<<<<<<<<<< + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + */ + __pyx_t_4 = (__pyx_v_subwords_idx_len[__pyx_v_m]); + for (__pyx_t_5 = 1; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { + __pyx_v_d = __pyx_t_5; + + /* "gensim/models/fasttext_inner.pyx":227 + * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) + * for d in range(1, subwords_idx_len[m]): + * count += ONEF # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + * if count > (0.5): + */ + __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14fasttext_inner_ONEF); + + /* "gensim/models/fasttext_inner.pyx":228 + * for d in range(1, subwords_idx_len[m]): + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< + * if count > (0.5): + * inv_count = ONEF/count + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_ngrams[(((__pyx_v_subwords_idx[__pyx_v_m])[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + } + } + __pyx_L3_continue:; + } + + /* "gensim/models/fasttext_inner.pyx":229 + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + * if count > (0.5): # <<<<<<<<<<<<<< + * inv_count = ONEF/count + * if cbow_mean: + */ + __pyx_t_3 = ((__pyx_v_count > ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); + if (__pyx_t_3) { + + /* "gensim/models/fasttext_inner.pyx":230 + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + * if count > (0.5): + * inv_count = ONEF/count # <<<<<<<<<<<<<< + * if cbow_mean: + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + */ + __pyx_v_inv_count = (__pyx_v_6gensim_6models_14fasttext_inner_ONEF / __pyx_v_count); + + /* "gensim/models/fasttext_inner.pyx":229 + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + * if count > (0.5): # <<<<<<<<<<<<<< + * inv_count = ONEF/count + * if cbow_mean: + */ + } + + /* "gensim/models/fasttext_inner.pyx":231 + * if count > (0.5): + * inv_count = ONEF/count + * if cbow_mean: # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * + */ + __pyx_t_3 = (__pyx_v_cbow_mean != 0); + if (__pyx_t_3) { + + /* "gensim/models/fasttext_inner.pyx":232 + * inv_count = ONEF/count + * if cbow_mean: + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< + * + * memset(work, 0, size * cython.sizeof(REAL_t)) + */ + __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":231 + * if count > (0.5): + * inv_count = ONEF/count + * if cbow_mean: # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * + */ + } + + /* "gensim/models/fasttext_inner.pyx":234 + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * + * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< + * for b in range(codelens[i]): + * row2 = word_point[b] * size + */ + memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); + + /* "gensim/models/fasttext_inner.pyx":235 + * + * memset(work, 0, size * cython.sizeof(REAL_t)) + * for b in range(codelens[i]): # <<<<<<<<<<<<<< + * row2 = word_point[b] * size + * f_dot = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) + */ + __pyx_t_1 = (__pyx_v_codelens[__pyx_v_i]); + for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_1; __pyx_t_6+=1) { + __pyx_v_b = __pyx_t_6; + + /* "gensim/models/fasttext_inner.pyx":236 + * memset(work, 0, size * cython.sizeof(REAL_t)) + * for b in range(codelens[i]): + * row2 = word_point[b] * size # <<<<<<<<<<<<<< + * f_dot = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: + */ + __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); + + /* "gensim/models/fasttext_inner.pyx":237 + * for b in range(codelens[i]): + * row2 = word_point[b] * size + * f_dot = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: + * continue + */ + __pyx_v_f_dot = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":238 + * row2 = word_point[b] * size + * f_dot = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< + * continue + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + */ + __pyx_t_7 = ((__pyx_v_f_dot <= -6.0) != 0); + if (!__pyx_t_7) { + } else { + __pyx_t_3 = __pyx_t_7; + goto __pyx_L13_bool_binop_done; + } + __pyx_t_7 = ((__pyx_v_f_dot >= 6.0) != 0); + __pyx_t_3 = __pyx_t_7; + __pyx_L13_bool_binop_done:; + if (__pyx_t_3) { + + /* "gensim/models/fasttext_inner.pyx":239 + * f_dot = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: + * continue # <<<<<<<<<<<<<< + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + * g = (1 - word_code[b] - f) * alpha + */ + goto __pyx_L10_continue; + + /* "gensim/models/fasttext_inner.pyx":238 + * row2 = word_point[b] * size + * f_dot = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< + * continue + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + */ + } + + /* "gensim/models/fasttext_inner.pyx":240 + * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: + * continue + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< + * g = (1 - word_code[b] - f) * alpha + * + */ + __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f_dot + 6.0) * 83.0))]); + + /* "gensim/models/fasttext_inner.pyx":241 + * continue + * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< + * + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + */ + __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); + + /* "gensim/models/fasttext_inner.pyx":243 + * g = (1 - word_code[b] - f) * alpha + * + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) + * + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":244 + * + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< + * + * if not cbow_mean: # divide error over summed window vectors + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + __pyx_L10_continue:; + } + + /* "gensim/models/fasttext_inner.pyx":246 + * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) + * + * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + * + */ + __pyx_t_3 = ((!(__pyx_v_cbow_mean != 0)) != 0); + if (__pyx_t_3) { + + /* "gensim/models/fasttext_inner.pyx":247 + * + * if not cbow_mean: # divide error over summed window vectors + * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< + * + * for m in range(j,k): + */ + __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":246 + * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) + * + * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + * + */ + } + + /* "gensim/models/fasttext_inner.pyx":249 + * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + * + * for m in range(j,k): # <<<<<<<<<<<<<< + * if m == i: + * continue + */ + __pyx_t_1 = __pyx_v_k; + for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { + __pyx_v_m = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":250 + * + * for m in range(j,k): + * if m == i: # <<<<<<<<<<<<<< + * continue + * else: + */ + __pyx_t_3 = ((__pyx_v_m == __pyx_v_i) != 0); + if (__pyx_t_3) { + + /* "gensim/models/fasttext_inner.pyx":251 + * for m in range(j,k): + * if m == i: + * continue # <<<<<<<<<<<<<< + * else: + * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) + */ + goto __pyx_L16_continue; + + /* "gensim/models/fasttext_inner.pyx":250 + * + * for m in range(j,k): + * if m == i: # <<<<<<<<<<<<<< + * continue + * else: + */ + } + + /* "gensim/models/fasttext_inner.pyx":253 + * continue + * else: + * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< + * for d in range(1, subwords_idx_len[m]): + * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) + */ + /*else*/ { + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + + /* "gensim/models/fasttext_inner.pyx":254 + * else: + * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) + * for d in range(1, subwords_idx_len[m]): # <<<<<<<<<<<<<< + * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) + * + */ + __pyx_t_4 = (__pyx_v_subwords_idx_len[__pyx_v_m]); + for (__pyx_t_5 = 1; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { + __pyx_v_d = __pyx_t_5; + + /* "gensim/models/fasttext_inner.pyx":255 + * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) + * for d in range(1, subwords_idx_len[m]): + * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) # <<<<<<<<<<<<<< + * + * + */ + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_ngrams[((__pyx_v_subwords_idx[__pyx_v_m])[__pyx_v_d])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_ngrams[(((__pyx_v_subwords_idx[__pyx_v_m])[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + } + } + __pyx_L16_continue:; + } + + /* "gensim/models/fasttext_inner.pyx":206 + * + * + * cdef void fast_sentence_cbow_hs( # <<<<<<<<<<<<<< + * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], + * REAL_t *neu1, REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1, const int size, + */ + + /* function exit code */ +} + +/* "gensim/models/fasttext_inner.pyx":258 + * + * + * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< + * cdef int hs = model.hs + * cdef int negative = model.negative + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_1train_batch_sg(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyMethodDef __pyx_mdef_6gensim_6models_14fasttext_inner_1train_batch_sg = {"train_batch_sg", (PyCFunction)__pyx_pw_6gensim_6models_14fasttext_inner_1train_batch_sg, METH_VARARGS|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_1train_batch_sg(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + PyObject *__pyx_v_model = 0; + PyObject *__pyx_v_sentences = 0; + PyObject *__pyx_v_alpha = 0; + PyObject *__pyx_v__work = 0; + PyObject *__pyx_v__l1 = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("train_batch_sg (wrapper)", 0); + { + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_sentences,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_l1,0}; + PyObject* values[5] = {0,0,0,0,0}; + if (unlikely(__pyx_kwds)) { + Py_ssize_t kw_args; + const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); + switch (pos_args) { + case 5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4); + CYTHON_FALLTHROUGH; + case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); + CYTHON_FALLTHROUGH; + case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + CYTHON_FALLTHROUGH; + case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + CYTHON_FALLTHROUGH; + case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = PyDict_Size(__pyx_kwds); + switch (pos_args) { + case 0: + if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_model)) != 0)) kw_args--; + else goto __pyx_L5_argtuple_error; + CYTHON_FALLTHROUGH; + case 1: + if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentences)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 1); __PYX_ERR(0, 258, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 2: + if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 2); __PYX_ERR(0, 258, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 3: + if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 3); __PYX_ERR(0, 258, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 4: + if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_l1)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 4); __PYX_ERR(0, 258, __pyx_L3_error) + } + } + if (unlikely(kw_args > 0)) { + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_batch_sg") < 0)) __PYX_ERR(0, 258, __pyx_L3_error) + } + } else if (PyTuple_GET_SIZE(__pyx_args) != 5) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + values[3] = PyTuple_GET_ITEM(__pyx_args, 3); + values[4] = PyTuple_GET_ITEM(__pyx_args, 4); + } + __pyx_v_model = values[0]; + __pyx_v_sentences = values[1]; + __pyx_v_alpha = values[2]; + __pyx_v__work = values[3]; + __pyx_v__l1 = values[4]; + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 258, __pyx_L3_error) + __pyx_L3_error:; + __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_sg", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(__pyx_self, __pyx_v_model, __pyx_v_sentences, __pyx_v_alpha, __pyx_v__work, __pyx_v__l1); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentences, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v__l1) { + int __pyx_v_hs; + int __pyx_v_negative; + int __pyx_v_sample; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_vocab; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_vocab; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_ngrams; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_ngrams; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_l1; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v__alpha; + int __pyx_v_size; + int __pyx_v_codelens[0x2710]; + __pyx_t_5numpy_uint32_t __pyx_v_indexes[0x2710]; + __pyx_t_5numpy_uint32_t __pyx_v_reduced_windows[0x2710]; + int __pyx_v_sentence_idx[(0x2710 + 1)]; + int __pyx_v_window; + int __pyx_v_i; + int __pyx_v_j; + int __pyx_v_k; + int __pyx_v_effective_words; + int __pyx_v_effective_sentences; + int __pyx_v_sent_idx; + int __pyx_v_idx_start; + int __pyx_v_idx_end; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1; + __pyx_t_5numpy_uint32_t *__pyx_v_points[0x2710]; + __pyx_t_5numpy_uint8_t *__pyx_v_codes[0x2710]; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg; + __pyx_t_5numpy_uint32_t *__pyx_v_cum_table; + unsigned PY_LONG_LONG __pyx_v_cum_table_len; + unsigned PY_LONG_LONG __pyx_v_next_random; + int __pyx_v_subwords_idx_len[0x2710]; + __pyx_t_5numpy_uint32_t *__pyx_v_subwords_idx[0x2710]; + PyObject *__pyx_v_subword_arrays = NULL; + PyObject *__pyx_v_vlookup = NULL; + PyObject *__pyx_v_sent = NULL; + PyObject *__pyx_v_token = NULL; + PyObject *__pyx_v_word = NULL; + PyObject *__pyx_v_subwords = NULL; + PyObject *__pyx_v_word_subwords = NULL; + PyObject *__pyx_v_item = NULL; + PyObject *__pyx_v_subword_i = NULL; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_t_2; + PyObject *__pyx_t_3 = NULL; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_t_4; + int __pyx_t_5; + Py_ssize_t __pyx_t_6; + int __pyx_t_7; + PyObject *__pyx_t_8 = NULL; + unsigned PY_LONG_LONG __pyx_t_9; + PyObject *(*__pyx_t_10)(PyObject *); + Py_ssize_t __pyx_t_11; + PyObject *(*__pyx_t_12)(PyObject *); + PyObject *__pyx_t_13 = NULL; + PyObject *__pyx_t_14 = NULL; + __pyx_t_5numpy_uint32_t __pyx_t_15; + PyObject *__pyx_t_16 = NULL; + PyObject *__pyx_t_17 = NULL; + Py_ssize_t __pyx_t_18; + PyObject *(*__pyx_t_19)(PyObject *); + int __pyx_t_20; + int __pyx_t_21; + int __pyx_t_22; + int __pyx_t_23; + int __pyx_t_24; + __Pyx_RefNannySetupContext("train_batch_sg", 0); + + /* "gensim/models/fasttext_inner.pyx":259 + * + * def train_batch_sg(model, sentences, alpha, _work, _l1): + * cdef int hs = model.hs # <<<<<<<<<<<<<< + * cdef int negative = model.negative + * cdef int sample = (model.sample != 0) + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 259, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 259, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_hs = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":260 + * def train_batch_sg(model, sentences, alpha, _work, _l1): + * cdef int hs = model.hs + * cdef int negative = model.negative # <<<<<<<<<<<<<< + * cdef int sample = (model.sample != 0) + * + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 260, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 260, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_negative = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":261 + * cdef int hs = model.hs + * cdef int negative = model.negative + * cdef int sample = (model.sample != 0) # <<<<<<<<<<<<<< + * + * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_sample); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 261, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = PyObject_RichCompare(__pyx_t_1, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 261, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 261, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_sample = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":263 + * cdef int sample = (model.sample != 0) + * + * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) # <<<<<<<<<<<<<< + * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) + * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 263, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn0_vocab); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 263, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 263, __pyx_L1_error) + __pyx_v_syn0_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "gensim/models/fasttext_inner.pyx":264 + * + * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) + * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) # <<<<<<<<<<<<<< + * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) + * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_vocab_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 264, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 264, __pyx_L1_error) + __pyx_v_word_locks_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "gensim/models/fasttext_inner.pyx":265 + * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) + * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) + * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) # <<<<<<<<<<<<<< + * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) + * + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 265, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn0_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 265, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 265, __pyx_L1_error) + __pyx_v_syn0_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "gensim/models/fasttext_inner.pyx":266 + * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) + * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) + * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) # <<<<<<<<<<<<<< + * + * cdef REAL_t *work + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_ngrams_lockf); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 266, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 266, __pyx_L1_error) + __pyx_v_word_locks_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "gensim/models/fasttext_inner.pyx":271 + * cdef REAL_t *l1 + * + * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< + * cdef int size = model.layer1_size + * + */ + __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 271, __pyx_L1_error) + __pyx_v__alpha = __pyx_t_4; + + /* "gensim/models/fasttext_inner.pyx":272 + * + * cdef REAL_t _alpha = alpha + * cdef int size = model.layer1_size # <<<<<<<<<<<<<< + * + * cdef int codelens[MAX_SENTENCE_LEN] + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 272, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 272, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_size = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":278 + * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] + * cdef int sentence_idx[MAX_SENTENCE_LEN + 1] + * cdef int window = model.window # <<<<<<<<<<<<<< + * + * cdef int i, j, k + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 278, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 278, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_window = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":281 + * + * cdef int i, j, k + * cdef int effective_words = 0, effective_sentences = 0 # <<<<<<<<<<<<<< + * cdef int sent_idx, idx_start, idx_end + * + */ + __pyx_v_effective_words = 0; + __pyx_v_effective_sentences = 0; + + /* "gensim/models/fasttext_inner.pyx":301 + * # dummy dictionary to ensure that the memory locations that subwords_idx point to + * # are referenced throughout so that it isn't put back to free memory pool by Python's memory manager + * subword_arrays = {} # <<<<<<<<<<<<<< + * + * if hs: + */ + __pyx_t_3 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 301, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_v_subword_arrays = ((PyObject*)__pyx_t_3); + __pyx_t_3 = 0; + + /* "gensim/models/fasttext_inner.pyx":303 + * subword_arrays = {} + * + * if hs: # <<<<<<<<<<<<<< + * syn1 = (np.PyArray_DATA(model.syn1)) + * + */ + __pyx_t_5 = (__pyx_v_hs != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":304 + * + * if hs: + * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< + * + * if negative: + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 304, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 304, __pyx_L1_error) + __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "gensim/models/fasttext_inner.pyx":303 + * subword_arrays = {} + * + * if hs: # <<<<<<<<<<<<<< + * syn1 = (np.PyArray_DATA(model.syn1)) + * + */ + } + + /* "gensim/models/fasttext_inner.pyx":306 + * syn1 = (np.PyArray_DATA(model.syn1)) + * + * if negative: # <<<<<<<<<<<<<< + * syn1neg = (np.PyArray_DATA(model.syn1neg)) + * cum_table = (np.PyArray_DATA(model.cum_table)) + */ + __pyx_t_5 = (__pyx_v_negative != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":307 + * + * if negative: + * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< + * cum_table = (np.PyArray_DATA(model.cum_table)) + * cum_table_len = len(model.cum_table) + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 307, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 307, __pyx_L1_error) + __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "gensim/models/fasttext_inner.pyx":308 + * if negative: + * syn1neg = (np.PyArray_DATA(model.syn1neg)) + * cum_table = (np.PyArray_DATA(model.cum_table)) # <<<<<<<<<<<<<< + * cum_table_len = len(model.cum_table) + * if negative or sample: + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 308, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 308, __pyx_L1_error) + __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + + /* "gensim/models/fasttext_inner.pyx":309 + * syn1neg = (np.PyArray_DATA(model.syn1neg)) + * cum_table = (np.PyArray_DATA(model.cum_table)) + * cum_table_len = len(model.cum_table) # <<<<<<<<<<<<<< + * if negative or sample: + * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 309, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == ((Py_ssize_t)-1))) __PYX_ERR(0, 309, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_cum_table_len = __pyx_t_6; + + /* "gensim/models/fasttext_inner.pyx":306 + * syn1 = (np.PyArray_DATA(model.syn1)) + * + * if negative: # <<<<<<<<<<<<<< + * syn1neg = (np.PyArray_DATA(model.syn1neg)) + * cum_table = (np.PyArray_DATA(model.cum_table)) + */ + } + + /* "gensim/models/fasttext_inner.pyx":310 + * cum_table = (np.PyArray_DATA(model.cum_table)) + * cum_table_len = len(model.cum_table) + * if negative or sample: # <<<<<<<<<<<<<< + * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) + * + */ + __pyx_t_7 = (__pyx_v_negative != 0); + if (!__pyx_t_7) { + } else { + __pyx_t_5 = __pyx_t_7; + goto __pyx_L6_bool_binop_done; + } + __pyx_t_7 = (__pyx_v_sample != 0); + __pyx_t_5 = __pyx_t_7; + __pyx_L6_bool_binop_done:; + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":311 + * cum_table_len = len(model.cum_table) + * if negative or sample: + * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< + * + * # convert Python structures to primitive types, so we can release the GIL + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 311, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 311, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 311, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 311, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 311, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 311, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 311, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 311, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 311, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_v_next_random = __pyx_t_9; + + /* "gensim/models/fasttext_inner.pyx":310 + * cum_table = (np.PyArray_DATA(model.cum_table)) + * cum_table_len = len(model.cum_table) + * if negative or sample: # <<<<<<<<<<<<<< + * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) + * + */ + } + + /* "gensim/models/fasttext_inner.pyx":314 + * + * # convert Python structures to primitive types, so we can release the GIL + * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< + * l1 = np.PyArray_DATA(_l1) + * + */ + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 314, __pyx_L1_error) + __pyx_v_work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); + + /* "gensim/models/fasttext_inner.pyx":315 + * # convert Python structures to primitive types, so we can release the GIL + * work = np.PyArray_DATA(_work) + * l1 = np.PyArray_DATA(_l1) # <<<<<<<<<<<<<< + * + * # prepare C structures so we can go "full C" and release the Python GIL + */ + if (!(likely(((__pyx_v__l1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__l1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 315, __pyx_L1_error) + __pyx_v_l1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__l1))); + + /* "gensim/models/fasttext_inner.pyx":318 + * + * # prepare C structures so we can go "full C" and release the Python GIL + * vlookup = model.wv.vocab # <<<<<<<<<<<<<< + * sentence_idx[0] = 0 # indices of the first sentence always start at 0 + * for sent in sentences: + */ + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 318, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 318, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_v_vlookup = __pyx_t_3; + __pyx_t_3 = 0; + + /* "gensim/models/fasttext_inner.pyx":319 + * # prepare C structures so we can go "full C" and release the Python GIL + * vlookup = model.wv.vocab + * sentence_idx[0] = 0 # indices of the first sentence always start at 0 # <<<<<<<<<<<<<< + * for sent in sentences: + * if not sent: + */ + (__pyx_v_sentence_idx[0]) = 0; + + /* "gensim/models/fasttext_inner.pyx":320 + * vlookup = model.wv.vocab + * sentence_idx[0] = 0 # indices of the first sentence always start at 0 + * for sent in sentences: # <<<<<<<<<<<<<< + * if not sent: + * continue # ignore empty sentences; leave effective_sentences unchanged + */ + if (likely(PyList_CheckExact(__pyx_v_sentences)) || PyTuple_CheckExact(__pyx_v_sentences)) { + __pyx_t_3 = __pyx_v_sentences; __Pyx_INCREF(__pyx_t_3); __pyx_t_6 = 0; + __pyx_t_10 = NULL; + } else { + __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 320, __pyx_L1_error) + } + for (;;) { + if (likely(!__pyx_t_10)) { + if (likely(PyList_CheckExact(__pyx_t_3))) { + if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_3)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 320, __pyx_L1_error) + #else + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + #endif + } else { + if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_3)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 320, __pyx_L1_error) + #else + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + #endif + } + } else { + __pyx_t_8 = __pyx_t_10(__pyx_t_3); + if (unlikely(!__pyx_t_8)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(0, 320, __pyx_L1_error) + } + break; + } + __Pyx_GOTREF(__pyx_t_8); + } + __Pyx_XDECREF_SET(__pyx_v_sent, __pyx_t_8); + __pyx_t_8 = 0; + + /* "gensim/models/fasttext_inner.pyx":321 + * sentence_idx[0] = 0 # indices of the first sentence always start at 0 + * for sent in sentences: + * if not sent: # <<<<<<<<<<<<<< + * continue # ignore empty sentences; leave effective_sentences unchanged + * for token in sent: + */ + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_7 = ((!__pyx_t_5) != 0); + if (__pyx_t_7) { + + /* "gensim/models/fasttext_inner.pyx":322 + * for sent in sentences: + * if not sent: + * continue # ignore empty sentences; leave effective_sentences unchanged # <<<<<<<<<<<<<< + * for token in sent: + * word = vlookup[token] if token in vlookup else None + */ + goto __pyx_L8_continue; + + /* "gensim/models/fasttext_inner.pyx":321 + * sentence_idx[0] = 0 # indices of the first sentence always start at 0 + * for sent in sentences: + * if not sent: # <<<<<<<<<<<<<< + * continue # ignore empty sentences; leave effective_sentences unchanged + * for token in sent: + */ + } + + /* "gensim/models/fasttext_inner.pyx":323 + * if not sent: + * continue # ignore empty sentences; leave effective_sentences unchanged + * for token in sent: # <<<<<<<<<<<<<< + * word = vlookup[token] if token in vlookup else None + * if word is None: + */ + if (likely(PyList_CheckExact(__pyx_v_sent)) || PyTuple_CheckExact(__pyx_v_sent)) { + __pyx_t_8 = __pyx_v_sent; __Pyx_INCREF(__pyx_t_8); __pyx_t_11 = 0; + __pyx_t_12 = NULL; + } else { + __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 323, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 323, __pyx_L1_error) + } + for (;;) { + if (likely(!__pyx_t_12)) { + if (likely(PyList_CheckExact(__pyx_t_8))) { + if (__pyx_t_11 >= PyList_GET_SIZE(__pyx_t_8)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 323, __pyx_L1_error) + #else + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 323, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + #endif + } else { + if (__pyx_t_11 >= PyTuple_GET_SIZE(__pyx_t_8)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 323, __pyx_L1_error) + #else + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 323, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + #endif + } + } else { + __pyx_t_1 = __pyx_t_12(__pyx_t_8); + if (unlikely(!__pyx_t_1)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(0, 323, __pyx_L1_error) + } + break; + } + __Pyx_GOTREF(__pyx_t_1); + } + __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_1); + __pyx_t_1 = 0; + + /* "gensim/models/fasttext_inner.pyx":324 + * continue # ignore empty sentences; leave effective_sentences unchanged + * for token in sent: + * word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< + * if word is None: + * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window + */ + __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 324, __pyx_L1_error) + if ((__pyx_t_7 != 0)) { + __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 324, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __pyx_t_1 = __pyx_t_13; + __pyx_t_13 = 0; + } else { + __Pyx_INCREF(Py_None); + __pyx_t_1 = Py_None; + } + __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_1); + __pyx_t_1 = 0; + + /* "gensim/models/fasttext_inner.pyx":325 + * for token in sent: + * word = vlookup[token] if token in vlookup else None + * if word is None: # <<<<<<<<<<<<<< + * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window + * if sample and word.sample_int < random_int32(&next_random): + */ + __pyx_t_7 = (__pyx_v_word == Py_None); + __pyx_t_5 = (__pyx_t_7 != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":326 + * word = vlookup[token] if token in vlookup else None + * if word is None: + * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window # <<<<<<<<<<<<<< + * if sample and word.sample_int < random_int32(&next_random): + * continue + */ + goto __pyx_L11_continue; + + /* "gensim/models/fasttext_inner.pyx":325 + * for token in sent: + * word = vlookup[token] if token in vlookup else None + * if word is None: # <<<<<<<<<<<<<< + * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window + * if sample and word.sample_int < random_int32(&next_random): + */ + } + + /* "gensim/models/fasttext_inner.pyx":327 + * if word is None: + * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window + * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< + * continue + * indexes[effective_words] = word.index + */ + __pyx_t_7 = (__pyx_v_sample != 0); + if (__pyx_t_7) { + } else { + __pyx_t_5 = __pyx_t_7; + goto __pyx_L15_bool_binop_done; + } + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 327, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 327, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 327, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 327, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + __pyx_t_5 = __pyx_t_7; + __pyx_L15_bool_binop_done:; + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":328 + * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window + * if sample and word.sample_int < random_int32(&next_random): + * continue # <<<<<<<<<<<<<< + * indexes[effective_words] = word.index + * + */ + goto __pyx_L11_continue; + + /* "gensim/models/fasttext_inner.pyx":327 + * if word is None: + * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window + * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< + * continue + * indexes[effective_words] = word.index + */ + } + + /* "gensim/models/fasttext_inner.pyx":329 + * if sample and word.sample_int < random_int32(&next_random): + * continue + * indexes[effective_words] = word.index # <<<<<<<<<<<<<< + * + * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] + */ + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 329, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 329, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + (__pyx_v_indexes[__pyx_v_effective_words]) = __pyx_t_15; + + /* "gensim/models/fasttext_inner.pyx":331 + * indexes[effective_words] = word.index + * + * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] # <<<<<<<<<<<<<< + * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) + * subwords_idx_len[effective_words] = (len(subwords) + 1) + */ + __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 331, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 331, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams_word); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 331, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 331, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_index2word); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 331, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 331, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __pyx_t_17 = PyObject_GetItem(__pyx_t_16, __pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 331, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_t_17); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 331, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + if (likely(PyList_CheckExact(__pyx_t_13)) || PyTuple_CheckExact(__pyx_t_13)) { + __pyx_t_17 = __pyx_t_13; __Pyx_INCREF(__pyx_t_17); __pyx_t_18 = 0; + __pyx_t_19 = NULL; + } else { + __pyx_t_18 = -1; __pyx_t_17 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 331, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_t_19 = Py_TYPE(__pyx_t_17)->tp_iternext; if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 331, __pyx_L1_error) + } + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + for (;;) { + if (likely(!__pyx_t_19)) { + if (likely(PyList_CheckExact(__pyx_t_17))) { + if (__pyx_t_18 >= PyList_GET_SIZE(__pyx_t_17)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_13 = PyList_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 331, __pyx_L1_error) + #else + __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 331, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + #endif + } else { + if (__pyx_t_18 >= PyTuple_GET_SIZE(__pyx_t_17)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 331, __pyx_L1_error) + #else + __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 331, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + #endif + } + } else { + __pyx_t_13 = __pyx_t_19(__pyx_t_17); + if (unlikely(!__pyx_t_13)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(0, 331, __pyx_L1_error) + } + break; + } + __Pyx_GOTREF(__pyx_t_13); + } + __Pyx_XDECREF_SET(__pyx_v_subword_i, __pyx_t_13); + __pyx_t_13 = 0; + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 331, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 331, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_v_subword_i); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 331, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_13))) __PYX_ERR(0, 331, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + } + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __Pyx_XDECREF_SET(__pyx_v_subwords, ((PyObject*)__pyx_t_14)); + __pyx_t_14 = 0; + + /* "gensim/models/fasttext_inner.pyx":332 + * + * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] + * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) # <<<<<<<<<<<<<< + * subwords_idx_len[effective_words] = (len(subwords) + 1) + * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) + */ + __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 332, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 332, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 332, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __pyx_t_13 = PyList_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 332, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __Pyx_GIVEREF(__pyx_t_14); + PyList_SET_ITEM(__pyx_t_13, 0, __pyx_t_14); + __pyx_t_14 = 0; + __pyx_t_14 = PyNumber_Add(__pyx_t_13, __pyx_v_subwords); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 332, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __pyx_t_13 = PyTuple_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 332, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + __Pyx_GIVEREF(__pyx_t_14); + PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_14); + __pyx_t_14 = 0; + __pyx_t_14 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 332, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_14); + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 332, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_uint32); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 332, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + if (PyDict_SetItem(__pyx_t_14, __pyx_n_s_dtype, __pyx_t_16) < 0) __PYX_ERR(0, 332, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + __pyx_t_16 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_t_13, __pyx_t_14); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 332, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + __Pyx_XDECREF_SET(__pyx_v_word_subwords, __pyx_t_16); + __pyx_t_16 = 0; + + /* "gensim/models/fasttext_inner.pyx":333 + * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] + * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) + * subwords_idx_len[effective_words] = (len(subwords) + 1) # <<<<<<<<<<<<<< + * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) + * # ensures reference count of word_subwords doesn't reach 0 + */ + __pyx_t_18 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 333, __pyx_L1_error) + (__pyx_v_subwords_idx_len[__pyx_v_effective_words]) = ((int)(__pyx_t_18 + 1)); + + /* "gensim/models/fasttext_inner.pyx":334 + * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) + * subwords_idx_len[effective_words] = (len(subwords) + 1) + * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) # <<<<<<<<<<<<<< + * # ensures reference count of word_subwords doesn't reach 0 + * subword_arrays[effective_words] = word_subwords + */ + if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 334, __pyx_L1_error) + (__pyx_v_subwords_idx[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_subwords))); + + /* "gensim/models/fasttext_inner.pyx":336 + * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) + * # ensures reference count of word_subwords doesn't reach 0 + * subword_arrays[effective_words] = word_subwords # <<<<<<<<<<<<<< + * + * if hs: + */ + __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 336, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_16, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 336, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + + /* "gensim/models/fasttext_inner.pyx":338 + * subword_arrays[effective_words] = word_subwords + * + * if hs: # <<<<<<<<<<<<<< + * codelens[effective_words] = len(word.code) + * codes[effective_words] = np.PyArray_DATA(word.code) + */ + __pyx_t_5 = (__pyx_v_hs != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":339 + * + * if hs: + * codelens[effective_words] = len(word.code) # <<<<<<<<<<<<<< + * codes[effective_words] = np.PyArray_DATA(word.code) + * points[effective_words] = np.PyArray_DATA(word.point) + */ + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 339, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __pyx_t_18 = PyObject_Length(__pyx_t_16); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 339, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + (__pyx_v_codelens[__pyx_v_effective_words]) = ((int)__pyx_t_18); + + /* "gensim/models/fasttext_inner.pyx":340 + * if hs: + * codelens[effective_words] = len(word.code) + * codes[effective_words] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< + * points[effective_words] = np.PyArray_DATA(word.point) + * + */ + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 340, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 340, __pyx_L1_error) + (__pyx_v_codes[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_16))); + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + + /* "gensim/models/fasttext_inner.pyx":341 + * codelens[effective_words] = len(word.code) + * codes[effective_words] = np.PyArray_DATA(word.code) + * points[effective_words] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< + * + * effective_words += 1 + */ + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 341, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 341, __pyx_L1_error) + (__pyx_v_points[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_16))); + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + + /* "gensim/models/fasttext_inner.pyx":338 + * subword_arrays[effective_words] = word_subwords + * + * if hs: # <<<<<<<<<<<<<< + * codelens[effective_words] = len(word.code) + * codes[effective_words] = np.PyArray_DATA(word.code) + */ + } + + /* "gensim/models/fasttext_inner.pyx":343 + * points[effective_words] = np.PyArray_DATA(word.point) + * + * effective_words += 1 # <<<<<<<<<<<<<< + * if effective_words == MAX_SENTENCE_LEN: + * break # TODO: log warning, tally overflow? + */ + __pyx_v_effective_words = (__pyx_v_effective_words + 1); + + /* "gensim/models/fasttext_inner.pyx":344 + * + * effective_words += 1 + * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< + * break # TODO: log warning, tally overflow? + * + */ + __pyx_t_5 = ((__pyx_v_effective_words == 0x2710) != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":345 + * effective_words += 1 + * if effective_words == MAX_SENTENCE_LEN: + * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< + * + * # keep track of which words go into which sentence, so we don't train + */ + goto __pyx_L12_break; + + /* "gensim/models/fasttext_inner.pyx":344 + * + * effective_words += 1 + * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< + * break # TODO: log warning, tally overflow? + * + */ + } + + /* "gensim/models/fasttext_inner.pyx":323 + * if not sent: + * continue # ignore empty sentences; leave effective_sentences unchanged + * for token in sent: # <<<<<<<<<<<<<< + * word = vlookup[token] if token in vlookup else None + * if word is None: + */ + __pyx_L11_continue:; + } + __pyx_L12_break:; + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + + /* "gensim/models/fasttext_inner.pyx":350 + * # across sentence boundaries. + * # indices of sentence number X are between tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 357, __pyx_L1_error) + } + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + for (;;) { + if (likely(!__pyx_t_10)) { + if (likely(PyList_CheckExact(__pyx_t_16))) { + if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_16)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 357, __pyx_L1_error) + #else + __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 357, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + #endif + } else { + if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_16)) break; + #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 357, __pyx_L1_error) + #else + __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 357, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + #endif + } + } else { + __pyx_t_3 = __pyx_t_10(__pyx_t_16); + if (unlikely(!__pyx_t_3)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else __PYX_ERR(0, 357, __pyx_L1_error) + } + break; + } + __Pyx_GOTREF(__pyx_t_3); + } + __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_3); + __pyx_t_3 = 0; + __pyx_v_i = __pyx_t_2; + __pyx_t_2 = (__pyx_t_2 + 1); + + /* "gensim/models/fasttext_inner.pyx":358 + * # precompute "reduced window" offsets in a single randint() call + * for i, item in enumerate(model.random.randint(0, window, effective_words)): + * reduced_windows[i] = item # <<<<<<<<<<<<<< + * + * with nogil: + */ + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 358, __pyx_L1_error) + (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_15; + + /* "gensim/models/fasttext_inner.pyx":357 + * + * # precompute "reduced window" offsets in a single randint() call + * for i, item in enumerate(model.random.randint(0, window, effective_words)): # <<<<<<<<<<<<<< + * reduced_windows[i] = item + * + */ + } + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + + /* "gensim/models/fasttext_inner.pyx":360 + * reduced_windows[i] = item + * + * with nogil: # <<<<<<<<<<<<<< + * for sent_idx in range(effective_sentences): + * idx_start = sentence_idx[sent_idx] + */ + { + #ifdef WITH_THREAD + PyThreadState *_save; + Py_UNBLOCK_THREADS + __Pyx_FastGIL_Remember(); + #endif + /*try:*/ { + + /* "gensim/models/fasttext_inner.pyx":361 + * + * with nogil: + * for sent_idx in range(effective_sentences): # <<<<<<<<<<<<<< + * idx_start = sentence_idx[sent_idx] + * idx_end = sentence_idx[sent_idx + 1] + */ + __pyx_t_2 = __pyx_v_effective_sentences; + for (__pyx_t_20 = 0; __pyx_t_20 < __pyx_t_2; __pyx_t_20+=1) { + __pyx_v_sent_idx = __pyx_t_20; + + /* "gensim/models/fasttext_inner.pyx":362 + * with nogil: + * for sent_idx in range(effective_sentences): + * idx_start = sentence_idx[sent_idx] # <<<<<<<<<<<<<< + * idx_end = sentence_idx[sent_idx + 1] + * for i in range(idx_start, idx_end): + */ + __pyx_v_idx_start = (__pyx_v_sentence_idx[__pyx_v_sent_idx]); + + /* "gensim/models/fasttext_inner.pyx":363 + * for sent_idx in range(effective_sentences): + * idx_start = sentence_idx[sent_idx] + * idx_end = sentence_idx[sent_idx + 1] # <<<<<<<<<<<<<< + * for i in range(idx_start, idx_end): + * j = i - window + reduced_windows[i] + */ + __pyx_v_idx_end = (__pyx_v_sentence_idx[(__pyx_v_sent_idx + 1)]); + + /* "gensim/models/fasttext_inner.pyx":364 + * idx_start = sentence_idx[sent_idx] + * idx_end = sentence_idx[sent_idx + 1] + * for i in range(idx_start, idx_end): # <<<<<<<<<<<<<< + * j = i - window + reduced_windows[i] + * if j < idx_start: + */ + __pyx_t_21 = __pyx_v_idx_end; + for (__pyx_t_22 = __pyx_v_idx_start; __pyx_t_22 < __pyx_t_21; __pyx_t_22+=1) { + __pyx_v_i = __pyx_t_22; + + /* "gensim/models/fasttext_inner.pyx":365 + * idx_end = sentence_idx[sent_idx + 1] + * for i in range(idx_start, idx_end): + * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< + * if j < idx_start: + * j = idx_start + */ + __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); + + /* "gensim/models/fasttext_inner.pyx":366 + * for i in range(idx_start, idx_end): + * j = i - window + reduced_windows[i] + * if j < idx_start: # <<<<<<<<<<<<<< + * j = idx_start + * k = i + window + 1 - reduced_windows[i] + */ + __pyx_t_5 = ((__pyx_v_j < __pyx_v_idx_start) != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":367 + * j = i - window + reduced_windows[i] + * if j < idx_start: + * j = idx_start # <<<<<<<<<<<<<< + * k = i + window + 1 - reduced_windows[i] + * if k > idx_end: + */ + __pyx_v_j = __pyx_v_idx_start; + + /* "gensim/models/fasttext_inner.pyx":366 + * for i in range(idx_start, idx_end): + * j = i - window + reduced_windows[i] + * if j < idx_start: # <<<<<<<<<<<<<< + * j = idx_start + * k = i + window + 1 - reduced_windows[i] + */ + } + + /* "gensim/models/fasttext_inner.pyx":368 + * if j < idx_start: + * j = idx_start + * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< + * if k > idx_end: + * k = idx_end + */ + __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); + + /* "gensim/models/fasttext_inner.pyx":369 + * j = idx_start + * k = i + window + 1 - reduced_windows[i] + * if k > idx_end: # <<<<<<<<<<<<<< + * k = idx_end + * for j in range(j, k): + */ + __pyx_t_5 = ((__pyx_v_k > __pyx_v_idx_end) != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":370 + * k = i + window + 1 - reduced_windows[i] + * if k > idx_end: + * k = idx_end # <<<<<<<<<<<<<< + * for j in range(j, k): + * if j == i: + */ + __pyx_v_k = __pyx_v_idx_end; + + /* "gensim/models/fasttext_inner.pyx":369 + * j = idx_start + * k = i + window + 1 - reduced_windows[i] + * if k > idx_end: # <<<<<<<<<<<<<< + * k = idx_end + * for j in range(j, k): + */ + } + + /* "gensim/models/fasttext_inner.pyx":371 + * if k > idx_end: + * k = idx_end + * for j in range(j, k): # <<<<<<<<<<<<<< + * if j == i: + * continue + */ + __pyx_t_23 = __pyx_v_k; + for (__pyx_t_24 = __pyx_v_j; __pyx_t_24 < __pyx_t_23; __pyx_t_24+=1) { + __pyx_v_j = __pyx_t_24; + + /* "gensim/models/fasttext_inner.pyx":372 + * k = idx_end + * for j in range(j, k): + * if j == i: # <<<<<<<<<<<<<< + * continue + * if hs: + */ + __pyx_t_5 = ((__pyx_v_j == __pyx_v_i) != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":373 + * for j in range(j, k): + * if j == i: + * continue # <<<<<<<<<<<<<< + * if hs: + * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) + */ + goto __pyx_L33_continue; + + /* "gensim/models/fasttext_inner.pyx":372 + * k = idx_end + * for j in range(j, k): + * if j == i: # <<<<<<<<<<<<<< + * continue + * if hs: + */ + } + + /* "gensim/models/fasttext_inner.pyx":374 + * if j == i: + * continue + * if hs: # <<<<<<<<<<<<<< + * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) + * if negative: + */ + __pyx_t_5 = (__pyx_v_hs != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":375 + * continue + * if hs: + * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) # <<<<<<<<<<<<<< + * if negative: + * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) + */ + __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs((__pyx_v_points[__pyx_v_j]), (__pyx_v_codes[__pyx_v_j]), (__pyx_v_codelens[__pyx_v_j]), __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1, __pyx_v_size, (__pyx_v_subwords_idx[__pyx_v_i]), (__pyx_v_subwords_idx_len[__pyx_v_i]), __pyx_v__alpha, __pyx_v_work, __pyx_v_l1, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); + + /* "gensim/models/fasttext_inner.pyx":374 + * if j == i: + * continue + * if hs: # <<<<<<<<<<<<<< + * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) + * if negative: + */ + } + + /* "gensim/models/fasttext_inner.pyx":376 + * if hs: + * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) + * if negative: # <<<<<<<<<<<<<< + * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) + * + */ + __pyx_t_5 = (__pyx_v_negative != 0); + if (__pyx_t_5) { + + /* "gensim/models/fasttext_inner.pyx":377 + * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) + * if negative: + * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) # <<<<<<<<<<<<<< + * + * return effective_words + */ + __pyx_v_next_random = __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), (__pyx_v_subwords_idx[__pyx_v_i]), (__pyx_v_subwords_idx_len[__pyx_v_i]), __pyx_v__alpha, __pyx_v_work, __pyx_v_l1, __pyx_v_next_random, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); + + /* "gensim/models/fasttext_inner.pyx":376 + * if hs: + * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) + * if negative: # <<<<<<<<<<<<<< + * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) + * + */ + } + __pyx_L33_continue:; + } + } + } + } + + /* "gensim/models/fasttext_inner.pyx":360 + * reduced_windows[i] = item + * + * with nogil: # <<<<<<<<<<<<<< + * for sent_idx in range(effective_sentences): + * idx_start = sentence_idx[sent_idx] + */ + /*finally:*/ { + /*normal exit:*/{ + #ifdef WITH_THREAD + __Pyx_FastGIL_Forget(); + Py_BLOCK_THREADS + #endif + goto __pyx_L26; + } + __pyx_L26:; + } + } + + /* "gensim/models/fasttext_inner.pyx":379 + * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) + * + * return effective_words # <<<<<<<<<<<<<< + * + * + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 379, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + __pyx_r = __pyx_t_16; + __pyx_t_16 = 0; + goto __pyx_L0; + + /* "gensim/models/fasttext_inner.pyx":258 + * + * + * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< + * cdef int hs = model.hs + * cdef int negative = model.negative + */ + + /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_XDECREF(__pyx_t_13); + __Pyx_XDECREF(__pyx_t_14); + __Pyx_XDECREF(__pyx_t_16); + __Pyx_XDECREF(__pyx_t_17); + __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_sg", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XDECREF(__pyx_v_subword_arrays); + __Pyx_XDECREF(__pyx_v_vlookup); + __Pyx_XDECREF(__pyx_v_sent); + __Pyx_XDECREF(__pyx_v_token); + __Pyx_XDECREF(__pyx_v_word); + __Pyx_XDECREF(__pyx_v_subwords); + __Pyx_XDECREF(__pyx_v_word_subwords); + __Pyx_XDECREF(__pyx_v_item); + __Pyx_XDECREF(__pyx_v_subword_i); + __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } -static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentences, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v__l1) { +/* "gensim/models/fasttext_inner.pyx":382 + * + * + * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< + * cdef int hs = model.hs + * cdef int negative = model.negative + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_3train_batch_cbow(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyMethodDef __pyx_mdef_6gensim_6models_14fasttext_inner_3train_batch_cbow = {"train_batch_cbow", (PyCFunction)__pyx_pw_6gensim_6models_14fasttext_inner_3train_batch_cbow, METH_VARARGS|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_3train_batch_cbow(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + PyObject *__pyx_v_model = 0; + PyObject *__pyx_v_sentences = 0; + PyObject *__pyx_v_alpha = 0; + PyObject *__pyx_v__work = 0; + PyObject *__pyx_v__neu1 = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("train_batch_cbow (wrapper)", 0); + { + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_sentences,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,0}; + PyObject* values[5] = {0,0,0,0,0}; + if (unlikely(__pyx_kwds)) { + Py_ssize_t kw_args; + const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); + switch (pos_args) { + case 5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4); + CYTHON_FALLTHROUGH; + case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); + CYTHON_FALLTHROUGH; + case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + CYTHON_FALLTHROUGH; + case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + CYTHON_FALLTHROUGH; + case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + CYTHON_FALLTHROUGH; + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = PyDict_Size(__pyx_kwds); + switch (pos_args) { + case 0: + if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_model)) != 0)) kw_args--; + else goto __pyx_L5_argtuple_error; + CYTHON_FALLTHROUGH; + case 1: + if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentences)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 1); __PYX_ERR(0, 382, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 2: + if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 2); __PYX_ERR(0, 382, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 3: + if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 3); __PYX_ERR(0, 382, __pyx_L3_error) + } + CYTHON_FALLTHROUGH; + case 4: + if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_neu1)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 4); __PYX_ERR(0, 382, __pyx_L3_error) + } + } + if (unlikely(kw_args > 0)) { + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_batch_cbow") < 0)) __PYX_ERR(0, 382, __pyx_L3_error) + } + } else if (PyTuple_GET_SIZE(__pyx_args) != 5) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + values[3] = PyTuple_GET_ITEM(__pyx_args, 3); + values[4] = PyTuple_GET_ITEM(__pyx_args, 4); + } + __pyx_v_model = values[0]; + __pyx_v_sentences = values[1]; + __pyx_v_alpha = values[2]; + __pyx_v__work = values[3]; + __pyx_v__neu1 = values[4]; + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 382, __pyx_L3_error) + __pyx_L3_error:; + __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_cbow", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(__pyx_self, __pyx_v_model, __pyx_v_sentences, __pyx_v_alpha, __pyx_v__work, __pyx_v__neu1); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentences, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v__neu1) { int __pyx_v_hs; int __pyx_v_negative; int __pyx_v_sample; + int __pyx_v_cbow_mean; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_vocab; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_vocab; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_ngrams; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_ngrams; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work; - __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_l1; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v__alpha; int __pyx_v_size; int __pyx_v_codelens[0x2710]; @@ -2692,6 +5158,8 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON unsigned PY_LONG_LONG __pyx_v_next_random; int __pyx_v_subwords_idx_len[0x2710]; __pyx_t_5numpy_uint32_t *__pyx_v_subwords_idx[0x2710]; + PyObject *__pyx_v_subword_arrays = NULL; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1; PyObject *__pyx_v_vlookup = NULL; PyObject *__pyx_v_sent = NULL; PyObject *__pyx_v_token = NULL; @@ -2724,146 +5192,157 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON int __pyx_t_20; int __pyx_t_21; int __pyx_t_22; - int __pyx_t_23; - int __pyx_t_24; - __Pyx_RefNannySetupContext("train_batch_sg", 0); + __Pyx_RefNannySetupContext("train_batch_cbow", 0); - /* "gensim/models/fasttext_inner.pyx":131 + /* "gensim/models/fasttext_inner.pyx":383 * - * def train_batch_sg(model, sentences, alpha, _work, _l1): + * def train_batch_cbow(model, sentences, alpha, _work, _neu1): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int sample = (model.sample != 0) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 131, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 383, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 131, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 383, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":132 - * def train_batch_sg(model, sentences, alpha, _work, _l1): + /* "gensim/models/fasttext_inner.pyx":384 + * def train_batch_cbow(model, sentences, alpha, _work, _neu1): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int sample = (model.sample != 0) - * + * cdef int cbow_mean = model.cbow_mean */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 132, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 384, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 132, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 384, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":133 + /* "gensim/models/fasttext_inner.pyx":385 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int sample = (model.sample != 0) # <<<<<<<<<<<<<< + * cdef int cbow_mean = model.cbow_mean * - * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_sample); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 133, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_sample); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 385, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = PyObject_RichCompare(__pyx_t_1, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 133, __pyx_L1_error) + __pyx_t_3 = PyObject_RichCompare(__pyx_t_1, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 385, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 133, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 385, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_sample = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":135 + /* "gensim/models/fasttext_inner.pyx":386 + * cdef int negative = model.negative * cdef int sample = (model.sample != 0) + * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< + * + * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 386, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 386, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_cbow_mean = __pyx_t_2; + + /* "gensim/models/fasttext_inner.pyx":388 + * cdef int cbow_mean = model.cbow_mean * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 135, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 388, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn0_vocab); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 135, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn0_vocab); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 388, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 135, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 388, __pyx_L1_error) __pyx_v_syn0_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":136 + /* "gensim/models/fasttext_inner.pyx":389 * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) # <<<<<<<<<<<<<< * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_vocab_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 136, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_vocab_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 389, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 136, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 389, __pyx_L1_error) __pyx_v_word_locks_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":137 + /* "gensim/models/fasttext_inner.pyx":390 * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 137, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 390, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn0_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 137, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn0_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 390, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 137, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 390, __pyx_L1_error) __pyx_v_syn0_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":138 + /* "gensim/models/fasttext_inner.pyx":391 * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) # <<<<<<<<<<<<<< * * cdef REAL_t *work */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_ngrams_lockf); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 138, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_ngrams_lockf); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 391, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 138, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 391, __pyx_L1_error) __pyx_v_word_locks_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":143 - * cdef REAL_t *l1 + /* "gensim/models/fasttext_inner.pyx":394 * + * cdef REAL_t *work * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 143, __pyx_L1_error) + __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 394, __pyx_L1_error) __pyx_v__alpha = __pyx_t_4; - /* "gensim/models/fasttext_inner.pyx":144 - * + /* "gensim/models/fasttext_inner.pyx":395 + * cdef REAL_t *work * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 144, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 395, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 144, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 395, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_size = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":150 + /* "gensim/models/fasttext_inner.pyx":401 * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] * cdef int sentence_idx[MAX_SENTENCE_LEN + 1] * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 150, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 401, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 150, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 401, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_window = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":153 + /* "gensim/models/fasttext_inner.pyx":404 * * cdef int i, j, k * cdef int effective_words = 0, effective_sentences = 0 # <<<<<<<<<<<<<< @@ -2873,8 +5352,20 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_v_effective_words = 0; __pyx_v_effective_sentences = 0; - /* "gensim/models/fasttext_inner.pyx":173 + /* "gensim/models/fasttext_inner.pyx":424 + * # dummy dictionary to ensure that the memory locations that subwords_idx point to + * # are referenced throughout so that it isn't put back to free memory pool by Python's memory manager + * subword_arrays = {} # <<<<<<<<<<<<<< * + * if hs: + */ + __pyx_t_3 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 424, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + __pyx_v_subword_arrays = ((PyObject*)__pyx_t_3); + __pyx_t_3 = 0; + + /* "gensim/models/fasttext_inner.pyx":426 + * subword_arrays = {} * * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.syn1)) @@ -2883,21 +5374,21 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":174 + /* "gensim/models/fasttext_inner.pyx":427 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 174, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 427, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 174, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 427, __pyx_L1_error) __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":173 - * + /* "gensim/models/fasttext_inner.pyx":426 + * subword_arrays = {} * * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.syn1)) @@ -2905,7 +5396,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":176 + /* "gensim/models/fasttext_inner.pyx":429 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -2915,46 +5406,46 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":177 + /* "gensim/models/fasttext_inner.pyx":430 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 177, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 430, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 177, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 430, __pyx_L1_error) __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":178 + /* "gensim/models/fasttext_inner.pyx":431 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) # <<<<<<<<<<<<<< * cum_table_len = len(model.cum_table) * if negative or sample: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 178, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 431, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 178, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 431, __pyx_L1_error) __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":179 + /* "gensim/models/fasttext_inner.pyx":432 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) # <<<<<<<<<<<<<< * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 179, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == ((Py_ssize_t)-1))) __PYX_ERR(0, 179, __pyx_L1_error) + __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == ((Py_ssize_t)-1))) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cum_table_len = __pyx_t_6; - /* "gensim/models/fasttext_inner.pyx":176 + /* "gensim/models/fasttext_inner.pyx":429 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -2963,7 +5454,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":180 + /* "gensim/models/fasttext_inner.pyx":433 * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -2981,41 +5472,41 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_L6_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":181 + /* "gensim/models/fasttext_inner.pyx":434 * cum_table_len = len(model.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 181, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 434, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 181, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 434, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 181, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 434, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 181, __pyx_L1_error) + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 434, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 181, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 434, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 181, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 434, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 181, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 434, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 181, __pyx_L1_error) + __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 434, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 181, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 434, __pyx_L1_error) __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_next_random = __pyx_t_9; - /* "gensim/models/fasttext_inner.pyx":180 + /* "gensim/models/fasttext_inner.pyx":433 * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -3024,42 +5515,42 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":184 + /* "gensim/models/fasttext_inner.pyx":437 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< - * l1 = np.PyArray_DATA(_l1) + * neu1 = np.PyArray_DATA(_neu1) * */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 184, __pyx_L1_error) + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 437, __pyx_L1_error) __pyx_v_work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "gensim/models/fasttext_inner.pyx":185 + /* "gensim/models/fasttext_inner.pyx":438 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) - * l1 = np.PyArray_DATA(_l1) # <<<<<<<<<<<<<< + * neu1 = np.PyArray_DATA(_neu1) # <<<<<<<<<<<<<< * * # prepare C structures so we can go "full C" and release the Python GIL */ - if (!(likely(((__pyx_v__l1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__l1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 185, __pyx_L1_error) - __pyx_v_l1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__l1))); + if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 438, __pyx_L1_error) + __pyx_v_neu1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__neu1))); - /* "gensim/models/fasttext_inner.pyx":188 + /* "gensim/models/fasttext_inner.pyx":441 * * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab # <<<<<<<<<<<<<< * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 188, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 441, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 188, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 441, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_vlookup = __pyx_t_3; __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":189 + /* "gensim/models/fasttext_inner.pyx":442 * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 # <<<<<<<<<<<<<< @@ -3068,7 +5559,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ (__pyx_v_sentence_idx[0]) = 0; - /* "gensim/models/fasttext_inner.pyx":190 + /* "gensim/models/fasttext_inner.pyx":443 * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: # <<<<<<<<<<<<<< @@ -3079,26 +5570,26 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_3 = __pyx_v_sentences; __Pyx_INCREF(__pyx_t_3); __pyx_t_6 = 0; __pyx_t_10 = NULL; } else { - __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 190, __pyx_L1_error) + __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 443, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 190, __pyx_L1_error) + __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 443, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_10)) { if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 190, __pyx_L1_error) + __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 443, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 190, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 443, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 190, __pyx_L1_error) + __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 443, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 190, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 443, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } @@ -3108,7 +5599,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 190, __pyx_L1_error) + else __PYX_ERR(0, 443, __pyx_L1_error) } break; } @@ -3117,18 +5608,18 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF_SET(__pyx_v_sent, __pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":191 + /* "gensim/models/fasttext_inner.pyx":444 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: */ - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 191, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 444, __pyx_L1_error) __pyx_t_7 = ((!__pyx_t_5) != 0); if (__pyx_t_7) { - /* "gensim/models/fasttext_inner.pyx":192 + /* "gensim/models/fasttext_inner.pyx":445 * for sent in sentences: * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged # <<<<<<<<<<<<<< @@ -3137,7 +5628,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L8_continue; - /* "gensim/models/fasttext_inner.pyx":191 + /* "gensim/models/fasttext_inner.pyx":444 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< @@ -3146,7 +5637,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":193 + /* "gensim/models/fasttext_inner.pyx":446 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< @@ -3157,26 +5648,26 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_8 = __pyx_v_sent; __Pyx_INCREF(__pyx_t_8); __pyx_t_11 = 0; __pyx_t_12 = NULL; } else { - __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 193, __pyx_L1_error) + __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 446, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 193, __pyx_L1_error) + __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 446, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_12)) { if (likely(PyList_CheckExact(__pyx_t_8))) { if (__pyx_t_11 >= PyList_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 193, __pyx_L1_error) + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 446, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 193, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 446, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_11 >= PyTuple_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 193, __pyx_L1_error) + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 446, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 193, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 446, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } @@ -3186,7 +5677,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 193, __pyx_L1_error) + else __PYX_ERR(0, 446, __pyx_L1_error) } break; } @@ -3195,16 +5686,16 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":194 + /* "gensim/models/fasttext_inner.pyx":447 * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: * word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window */ - __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 194, __pyx_L1_error) + __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 447, __pyx_L1_error) if ((__pyx_t_7 != 0)) { - __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 194, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 447, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __pyx_t_1 = __pyx_t_13; __pyx_t_13 = 0; @@ -3215,7 +5706,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":195 + /* "gensim/models/fasttext_inner.pyx":448 * for token in sent: * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< @@ -3226,7 +5717,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_t_7 != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":196 + /* "gensim/models/fasttext_inner.pyx":449 * word = vlookup[token] if token in vlookup else None * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window # <<<<<<<<<<<<<< @@ -3235,7 +5726,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":195 + /* "gensim/models/fasttext_inner.pyx":448 * for token in sent: * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< @@ -3244,7 +5735,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":197 + /* "gensim/models/fasttext_inner.pyx":450 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -3257,20 +5748,20 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = __pyx_t_7; goto __pyx_L15_bool_binop_done; } - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 197, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 197, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 197, __pyx_L1_error) + __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 197, __pyx_L1_error) + __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __pyx_t_5 = __pyx_t_7; __pyx_L15_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":198 + /* "gensim/models/fasttext_inner.pyx":451 * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): * continue # <<<<<<<<<<<<<< @@ -3279,7 +5770,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":197 + /* "gensim/models/fasttext_inner.pyx":450 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -3288,45 +5779,45 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":199 + /* "gensim/models/fasttext_inner.pyx":452 * if sample and word.sample_int < random_int32(&next_random): * continue * indexes[effective_words] = word.index # <<<<<<<<<<<<<< * * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] */ - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 199, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 199, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; (__pyx_v_indexes[__pyx_v_effective_words]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":201 + /* "gensim/models/fasttext_inner.pyx":454 * indexes[effective_words] = word.index * * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] # <<<<<<<<<<<<<< * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = (len(subwords) + 1) */ - __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 201, __pyx_L1_error) + __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 201, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams_word); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 201, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams_word); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 201, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_index2word); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 201, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_index2word); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 201, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_17 = PyObject_GetItem(__pyx_t_16, __pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 201, __pyx_L1_error) + __pyx_t_17 = PyObject_GetItem(__pyx_t_16, __pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_t_17); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 201, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_t_17); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; @@ -3334,9 +5825,9 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_17 = __pyx_t_13; __Pyx_INCREF(__pyx_t_17); __pyx_t_18 = 0; __pyx_t_19 = NULL; } else { - __pyx_t_18 = -1; __pyx_t_17 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 201, __pyx_L1_error) + __pyx_t_18 = -1; __pyx_t_17 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); - __pyx_t_19 = Py_TYPE(__pyx_t_17)->tp_iternext; if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 201, __pyx_L1_error) + __pyx_t_19 = Py_TYPE(__pyx_t_17)->tp_iternext; if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 454, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; for (;;) { @@ -3344,17 +5835,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON if (likely(PyList_CheckExact(__pyx_t_17))) { if (__pyx_t_18 >= PyList_GET_SIZE(__pyx_t_17)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyList_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 201, __pyx_L1_error) + __pyx_t_13 = PyList_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 454, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 201, __pyx_L1_error) + __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); #endif } else { if (__pyx_t_18 >= PyTuple_GET_SIZE(__pyx_t_17)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 201, __pyx_L1_error) + __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 454, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 201, __pyx_L1_error) + __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); #endif } @@ -3364,7 +5855,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 201, __pyx_L1_error) + else __PYX_ERR(0, 454, __pyx_L1_error) } break; } @@ -3372,58 +5863,58 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON } __Pyx_XDECREF_SET(__pyx_v_subword_i, __pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 201, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 201, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_v_subword_i); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 201, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_v_subword_i); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_13))) __PYX_ERR(0, 201, __pyx_L1_error) + if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_13))) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; } __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_XDECREF_SET(__pyx_v_subwords, ((PyObject*)__pyx_t_14)); __pyx_t_14 = 0; - /* "gensim/models/fasttext_inner.pyx":202 + /* "gensim/models/fasttext_inner.pyx":455 * * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) # <<<<<<<<<<<<<< * subwords_idx_len[effective_words] = (len(subwords) + 1) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) */ - __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 202, __pyx_L1_error) + __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 455, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 202, __pyx_L1_error) + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 455, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 202, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 455, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_13 = PyList_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 202, __pyx_L1_error) + __pyx_t_13 = PyList_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 455, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_GIVEREF(__pyx_t_14); PyList_SET_ITEM(__pyx_t_13, 0, __pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = PyNumber_Add(__pyx_t_13, __pyx_v_subwords); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 202, __pyx_L1_error) + __pyx_t_14 = PyNumber_Add(__pyx_t_13, __pyx_v_subwords); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 455, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyTuple_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 202, __pyx_L1_error) + __pyx_t_13 = PyTuple_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 455, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_GIVEREF(__pyx_t_14); PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 202, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 455, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 202, __pyx_L1_error) + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 455, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_uint32); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 202, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_uint32); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 455, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyDict_SetItem(__pyx_t_14, __pyx_n_s_dtype, __pyx_t_16) < 0) __PYX_ERR(0, 202, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_14, __pyx_n_s_dtype, __pyx_t_16) < 0) __PYX_ERR(0, 455, __pyx_L1_error) __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __pyx_t_16 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_t_13, __pyx_t_14); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 202, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_t_13, __pyx_t_14); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 455, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; @@ -3431,28 +5922,40 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF_SET(__pyx_v_word_subwords, __pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":203 + /* "gensim/models/fasttext_inner.pyx":456 * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = (len(subwords) + 1) # <<<<<<<<<<<<<< * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) - * + * # ensures reference count of word_subwords doesn't reach 0 */ - __pyx_t_18 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 203, __pyx_L1_error) + __pyx_t_18 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 456, __pyx_L1_error) (__pyx_v_subwords_idx_len[__pyx_v_effective_words]) = ((int)(__pyx_t_18 + 1)); - /* "gensim/models/fasttext_inner.pyx":204 + /* "gensim/models/fasttext_inner.pyx":457 * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = (len(subwords) + 1) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) # <<<<<<<<<<<<<< - * - * if hs: + * # ensures reference count of word_subwords doesn't reach 0 + * subword_arrays[effective_words] = word_subwords */ - if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 204, __pyx_L1_error) + if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 457, __pyx_L1_error) (__pyx_v_subwords_idx[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_subwords))); - /* "gensim/models/fasttext_inner.pyx":206 + /* "gensim/models/fasttext_inner.pyx":459 * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) + * # ensures reference count of word_subwords doesn't reach 0 + * subword_arrays[effective_words] = word_subwords # <<<<<<<<<<<<<< + * + * if hs: + */ + __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 459, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_16); + if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_16, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 459, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + + /* "gensim/models/fasttext_inner.pyx":461 + * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< * codelens[effective_words] = len(word.code) @@ -3461,47 +5964,47 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":207 + /* "gensim/models/fasttext_inner.pyx":462 * * if hs: * codelens[effective_words] = len(word.code) # <<<<<<<<<<<<<< * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 207, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 462, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - __pyx_t_18 = PyObject_Length(__pyx_t_16); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 207, __pyx_L1_error) + __pyx_t_18 = PyObject_Length(__pyx_t_16); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 462, __pyx_L1_error) __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; (__pyx_v_codelens[__pyx_v_effective_words]) = ((int)__pyx_t_18); - /* "gensim/models/fasttext_inner.pyx":208 + /* "gensim/models/fasttext_inner.pyx":463 * if hs: * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[effective_words] = np.PyArray_DATA(word.point) - * + * effective_words += 1 */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 208, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 463, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 208, __pyx_L1_error) + if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 463, __pyx_L1_error) (__pyx_v_codes[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_16))); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":209 + /* "gensim/models/fasttext_inner.pyx":464 * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< - * * effective_words += 1 + * if effective_words == MAX_SENTENCE_LEN: */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 209, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 464, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 209, __pyx_L1_error) + if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 464, __pyx_L1_error) (__pyx_v_points[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_16))); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":206 - * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) + /* "gensim/models/fasttext_inner.pyx":461 + * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< * codelens[effective_words] = len(word.code) @@ -3509,17 +6012,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":211 + /* "gensim/models/fasttext_inner.pyx":465 + * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) - * * effective_words += 1 # <<<<<<<<<<<<<< * if effective_words == MAX_SENTENCE_LEN: * break # TODO: log warning, tally overflow? */ __pyx_v_effective_words = (__pyx_v_effective_words + 1); - /* "gensim/models/fasttext_inner.pyx":212 - * + /* "gensim/models/fasttext_inner.pyx":466 + * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< * break # TODO: log warning, tally overflow? @@ -3528,7 +6031,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_effective_words == 0x2710) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":213 + /* "gensim/models/fasttext_inner.pyx":467 * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< @@ -3537,8 +6040,8 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L12_break; - /* "gensim/models/fasttext_inner.pyx":212 - * + /* "gensim/models/fasttext_inner.pyx":466 + * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< * break # TODO: log warning, tally overflow? @@ -3546,7 +6049,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":193 + /* "gensim/models/fasttext_inner.pyx":446 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< @@ -3558,7 +6061,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_L12_break:; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":218 + /* "gensim/models/fasttext_inner.pyx":472 * # across sentence boundaries. * # indices of sentence number X are between tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 225, __pyx_L1_error) + __pyx_t_10 = Py_TYPE(__pyx_t_16)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 479, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; for (;;) { @@ -3699,17 +6202,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON if (likely(PyList_CheckExact(__pyx_t_16))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_16)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 225, __pyx_L1_error) + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 479, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 225, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 479, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_16)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 225, __pyx_L1_error) + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 479, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 225, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 479, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } @@ -3719,7 +6222,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 225, __pyx_L1_error) + else __PYX_ERR(0, 479, __pyx_L1_error) } break; } @@ -3730,17 +6233,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "gensim/models/fasttext_inner.pyx":226 + /* "gensim/models/fasttext_inner.pyx":480 * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * - * with nogil: + * # release GIL & train on all sentences */ - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 226, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 480, __pyx_L1_error) (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":225 + /* "gensim/models/fasttext_inner.pyx":479 * * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): # <<<<<<<<<<<<<< @@ -3750,9 +6253,9 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON } __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":228 - * reduced_windows[i] = item + /* "gensim/models/fasttext_inner.pyx":483 * + * # release GIL & train on all sentences * with nogil: # <<<<<<<<<<<<<< * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] @@ -3765,8 +6268,8 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON #endif /*try:*/ { - /* "gensim/models/fasttext_inner.pyx":229 - * + /* "gensim/models/fasttext_inner.pyx":484 + * # release GIL & train on all sentences * with nogil: * for sent_idx in range(effective_sentences): # <<<<<<<<<<<<<< * idx_start = sentence_idx[sent_idx] @@ -3776,7 +6279,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON for (__pyx_t_20 = 0; __pyx_t_20 < __pyx_t_2; __pyx_t_20+=1) { __pyx_v_sent_idx = __pyx_t_20; - /* "gensim/models/fasttext_inner.pyx":230 + /* "gensim/models/fasttext_inner.pyx":485 * with nogil: * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] # <<<<<<<<<<<<<< @@ -3785,7 +6288,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_idx_start = (__pyx_v_sentence_idx[__pyx_v_sent_idx]); - /* "gensim/models/fasttext_inner.pyx":231 + /* "gensim/models/fasttext_inner.pyx":486 * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] # <<<<<<<<<<<<<< @@ -3794,7 +6297,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_idx_end = (__pyx_v_sentence_idx[(__pyx_v_sent_idx + 1)]); - /* "gensim/models/fasttext_inner.pyx":232 + /* "gensim/models/fasttext_inner.pyx":487 * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): # <<<<<<<<<<<<<< @@ -3805,7 +6308,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON for (__pyx_t_22 = __pyx_v_idx_start; __pyx_t_22 < __pyx_t_21; __pyx_t_22+=1) { __pyx_v_i = __pyx_t_22; - /* "gensim/models/fasttext_inner.pyx":233 + /* "gensim/models/fasttext_inner.pyx":488 * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -3814,7 +6317,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":234 + /* "gensim/models/fasttext_inner.pyx":489 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -3824,7 +6327,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_j < __pyx_v_idx_start) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":235 + /* "gensim/models/fasttext_inner.pyx":490 * j = i - window + reduced_windows[i] * if j < idx_start: * j = idx_start # <<<<<<<<<<<<<< @@ -3833,7 +6336,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_j = __pyx_v_idx_start; - /* "gensim/models/fasttext_inner.pyx":234 + /* "gensim/models/fasttext_inner.pyx":489 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -3842,7 +6345,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":236 + /* "gensim/models/fasttext_inner.pyx":491 * if j < idx_start: * j = idx_start * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -3851,137 +6354,96 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":237 + /* "gensim/models/fasttext_inner.pyx":492 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< * k = idx_end - * for j in range(j, k): + * */ __pyx_t_5 = ((__pyx_v_k > __pyx_v_idx_end) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":238 + /* "gensim/models/fasttext_inner.pyx":493 * k = i + window + 1 - reduced_windows[i] * if k > idx_end: * k = idx_end # <<<<<<<<<<<<<< - * for j in range(j, k): - * if j == i: + * + * if hs: */ __pyx_v_k = __pyx_v_idx_end; - /* "gensim/models/fasttext_inner.pyx":237 + /* "gensim/models/fasttext_inner.pyx":492 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< * k = idx_end - * for j in range(j, k): + * */ } - /* "gensim/models/fasttext_inner.pyx":239 - * if k > idx_end: - * k = idx_end - * for j in range(j, k): # <<<<<<<<<<<<<< - * if j == i: - * continue - */ - __pyx_t_23 = __pyx_v_k; - for (__pyx_t_24 = __pyx_v_j; __pyx_t_24 < __pyx_t_23; __pyx_t_24+=1) { - __pyx_v_j = __pyx_t_24; - - /* "gensim/models/fasttext_inner.pyx":240 + /* "gensim/models/fasttext_inner.pyx":495 * k = idx_end - * for j in range(j, k): - * if j == i: # <<<<<<<<<<<<<< - * continue - * if hs: + * + * if hs: # <<<<<<<<<<<<<< + * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0_vocab, syn0_ngrams, syn1, size, indexes,subwords_idx, + * subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, word_locks_ngrams) */ - __pyx_t_5 = ((__pyx_v_j == __pyx_v_i) != 0); - if (__pyx_t_5) { + __pyx_t_5 = (__pyx_v_hs != 0); + if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":241 - * for j in range(j, k): - * if j == i: - * continue # <<<<<<<<<<<<<< - * if hs: - * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) + /* "gensim/models/fasttext_inner.pyx":496 + * + * if hs: + * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0_vocab, syn0_ngrams, syn1, size, indexes,subwords_idx, # <<<<<<<<<<<<<< + * subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, word_locks_ngrams) + * if negative: */ - goto __pyx_L33_continue; + __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1, __pyx_v_size, __pyx_v_indexes, __pyx_v_subwords_idx, __pyx_v_subwords_idx_len, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":240 + /* "gensim/models/fasttext_inner.pyx":495 * k = idx_end - * for j in range(j, k): - * if j == i: # <<<<<<<<<<<<<< - * continue - * if hs: - */ - } - - /* "gensim/models/fasttext_inner.pyx":242 - * if j == i: - * continue - * if hs: # <<<<<<<<<<<<<< - * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) - * if negative: - */ - __pyx_t_5 = (__pyx_v_hs != 0); - if (__pyx_t_5) { - - /* "gensim/models/fasttext_inner.pyx":243 - * continue - * if hs: - * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) # <<<<<<<<<<<<<< - * if negative: - * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) - */ - __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs((__pyx_v_points[__pyx_v_j]), (__pyx_v_codes[__pyx_v_j]), (__pyx_v_codelens[__pyx_v_j]), __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1, __pyx_v_size, (__pyx_v_subwords_idx[__pyx_v_i]), (__pyx_v_subwords_idx_len[__pyx_v_i]), __pyx_v__alpha, __pyx_v_work, __pyx_v_l1, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - - /* "gensim/models/fasttext_inner.pyx":242 - * if j == i: - * continue - * if hs: # <<<<<<<<<<<<<< - * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) - * if negative: + * + * if hs: # <<<<<<<<<<<<<< + * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0_vocab, syn0_ngrams, syn1, size, indexes,subwords_idx, + * subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, word_locks_ngrams) */ - } + } - /* "gensim/models/fasttext_inner.pyx":244 - * if hs: - * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) - * if negative: # <<<<<<<<<<<<<< - * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) - * + /* "gensim/models/fasttext_inner.pyx":498 + * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0_vocab, syn0_ngrams, syn1, size, indexes,subwords_idx, + * subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, word_locks_ngrams) + * if negative: # <<<<<<<<<<<<<< + * next_random = fast_sentence_cbow_neg(negative, cum_table, cum_table_len, codelens, neu1, syn0_vocab, syn0_ngrams, syn1neg, size, indexes, subwords_idx, + * subwords_idx_len, _alpha, work, i, j, k, cbow_mean, next_random, word_locks_vocab, word_locks_ngrams) */ - __pyx_t_5 = (__pyx_v_negative != 0); - if (__pyx_t_5) { + __pyx_t_5 = (__pyx_v_negative != 0); + if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":245 - * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) - * if negative: - * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) # <<<<<<<<<<<<<< + /* "gensim/models/fasttext_inner.pyx":499 + * subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, word_locks_ngrams) + * if negative: + * next_random = fast_sentence_cbow_neg(negative, cum_table, cum_table_len, codelens, neu1, syn0_vocab, syn0_ngrams, syn1neg, size, indexes, subwords_idx, # <<<<<<<<<<<<<< + * subwords_idx_len, _alpha, work, i, j, k, cbow_mean, next_random, word_locks_vocab, word_locks_ngrams) * - * return effective_words */ - __pyx_v_next_random = __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), (__pyx_v_subwords_idx[__pyx_v_i]), (__pyx_v_subwords_idx_len[__pyx_v_i]), __pyx_v__alpha, __pyx_v_work, __pyx_v_l1, __pyx_v_next_random, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); + __pyx_v_next_random = __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1neg, __pyx_v_size, __pyx_v_indexes, __pyx_v_subwords_idx, __pyx_v_subwords_idx_len, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_next_random, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":244 - * if hs: - * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) - * if negative: # <<<<<<<<<<<<<< - * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) - * + /* "gensim/models/fasttext_inner.pyx":498 + * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0_vocab, syn0_ngrams, syn1, size, indexes,subwords_idx, + * subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, word_locks_ngrams) + * if negative: # <<<<<<<<<<<<<< + * next_random = fast_sentence_cbow_neg(negative, cum_table, cum_table_len, codelens, neu1, syn0_vocab, syn0_ngrams, syn1neg, size, indexes, subwords_idx, + * subwords_idx_len, _alpha, work, i, j, k, cbow_mean, next_random, word_locks_vocab, word_locks_ngrams) */ - } - __pyx_L33_continue:; } } } } - /* "gensim/models/fasttext_inner.pyx":228 - * reduced_windows[i] = item + /* "gensim/models/fasttext_inner.pyx":483 * + * # release GIL & train on all sentences * with nogil: # <<<<<<<<<<<<<< * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] @@ -3998,24 +6460,24 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON } } - /* "gensim/models/fasttext_inner.pyx":247 - * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) + /* "gensim/models/fasttext_inner.pyx":502 + * subwords_idx_len, _alpha, work, i, j, k, cbow_mean, next_random, word_locks_vocab, word_locks_ngrams) * * return effective_words # <<<<<<<<<<<<<< * * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 247, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 502, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __pyx_r = __pyx_t_16; __pyx_t_16 = 0; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":130 - * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) + /* "gensim/models/fasttext_inner.pyx":382 * - * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< + * + * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ @@ -4029,9 +6491,10 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF(__pyx_t_14); __Pyx_XDECREF(__pyx_t_16); __Pyx_XDECREF(__pyx_t_17); - __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_sg", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_cbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; + __Pyx_XDECREF(__pyx_v_subword_arrays); __Pyx_XDECREF(__pyx_v_vlookup); __Pyx_XDECREF(__pyx_v_sent); __Pyx_XDECREF(__pyx_v_token); @@ -4045,7 +6508,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON return __pyx_r; } -/* "gensim/models/fasttext_inner.pyx":250 +/* "gensim/models/fasttext_inner.pyx":505 * * * def init(): # <<<<<<<<<<<<<< @@ -4054,21 +6517,21 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ /* Python wrapper */ -static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_3init(PyObject *__pyx_self, CYTHON_UNUSED PyObject *unused); /*proto*/ -static char __pyx_doc_6gensim_6models_14fasttext_inner_2init[] = "\n Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized\n into table EXP_TABLE. Also calculate log(sigmoid(x)) into LOG_TABLE.\n "; -static PyMethodDef __pyx_mdef_6gensim_6models_14fasttext_inner_3init = {"init", (PyCFunction)__pyx_pw_6gensim_6models_14fasttext_inner_3init, METH_NOARGS, __pyx_doc_6gensim_6models_14fasttext_inner_2init}; -static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_3init(PyObject *__pyx_self, CYTHON_UNUSED PyObject *unused) { +static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_5init(PyObject *__pyx_self, CYTHON_UNUSED PyObject *unused); /*proto*/ +static char __pyx_doc_6gensim_6models_14fasttext_inner_4init[] = "\n Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized\n into table EXP_TABLE. Also calculate log(sigmoid(x)) into LOG_TABLE.\n "; +static PyMethodDef __pyx_mdef_6gensim_6models_14fasttext_inner_5init = {"init", (PyCFunction)__pyx_pw_6gensim_6models_14fasttext_inner_5init, METH_NOARGS, __pyx_doc_6gensim_6models_14fasttext_inner_4init}; +static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_5init(PyObject *__pyx_self, CYTHON_UNUSED PyObject *unused) { PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("init (wrapper)", 0); - __pyx_r = __pyx_pf_6gensim_6models_14fasttext_inner_2init(__pyx_self); + __pyx_r = __pyx_pf_6gensim_6models_14fasttext_inner_4init(__pyx_self); /* function exit code */ __Pyx_RefNannyFinishContext(); return __pyx_r; } -static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED PyObject *__pyx_self) { +static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED PyObject *__pyx_self) { int __pyx_v_i; float *__pyx_v_x; float *__pyx_v_y; @@ -4081,11 +6544,10 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P float __pyx_t_1[1]; float __pyx_t_2[1]; int __pyx_t_3; - __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_t_4; - int __pyx_t_5; + int __pyx_t_4; __Pyx_RefNannySetupContext("init", 0); - /* "gensim/models/fasttext_inner.pyx":259 + /* "gensim/models/fasttext_inner.pyx":514 * * cdef int i * cdef float *x = [10.0] # <<<<<<<<<<<<<< @@ -4095,7 +6557,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P __pyx_t_1[0] = ((float)10.0); __pyx_v_x = __pyx_t_1; - /* "gensim/models/fasttext_inner.pyx":260 + /* "gensim/models/fasttext_inner.pyx":515 * cdef int i * cdef float *x = [10.0] * cdef float *y = [0.01] # <<<<<<<<<<<<<< @@ -4105,7 +6567,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P __pyx_t_2[0] = ((float)0.01); __pyx_v_y = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":261 + /* "gensim/models/fasttext_inner.pyx":516 * cdef float *x = [10.0] * cdef float *y = [0.01] * cdef float expected = 0.1 # <<<<<<<<<<<<<< @@ -4114,7 +6576,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ __pyx_v_expected = ((float)0.1); - /* "gensim/models/fasttext_inner.pyx":262 + /* "gensim/models/fasttext_inner.pyx":517 * cdef float *y = [0.01] * cdef float expected = 0.1 * cdef int size = 1 # <<<<<<<<<<<<<< @@ -4123,7 +6585,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ __pyx_v_size = 1; - /* "gensim/models/fasttext_inner.pyx":267 + /* "gensim/models/fasttext_inner.pyx":522 * * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): # <<<<<<<<<<<<<< @@ -4133,7 +6595,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P for (__pyx_t_3 = 0; __pyx_t_3 < 0x3E8; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "gensim/models/fasttext_inner.pyx":268 + /* "gensim/models/fasttext_inner.pyx":523 * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) # <<<<<<<<<<<<<< @@ -4142,21 +6604,16 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)exp(((((__pyx_v_i / ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0x3E8)) * 2.0) - 1.0) * 6.0))); - /* "gensim/models/fasttext_inner.pyx":269 + /* "gensim/models/fasttext_inner.pyx":524 * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) # <<<<<<<<<<<<<< * LOG_TABLE[i] = log( EXP_TABLE[i] ) * */ - __pyx_t_4 = ((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) + 1.0); - if (unlikely(__pyx_t_4 == 0)) { - PyErr_SetString(PyExc_ZeroDivisionError, "float division"); - __PYX_ERR(0, 269, __pyx_L1_error) - } - (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) / __pyx_t_4)); + (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) / ((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) + 1.0))); - /* "gensim/models/fasttext_inner.pyx":270 + /* "gensim/models/fasttext_inner.pyx":525 * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) * LOG_TABLE[i] = log( EXP_TABLE[i] ) # <<<<<<<<<<<<<< @@ -4166,7 +6623,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P (__pyx_v_6gensim_6models_14fasttext_inner_LOG_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)log((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]))); } - /* "gensim/models/fasttext_inner.pyx":273 + /* "gensim/models/fasttext_inner.pyx":528 * * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) # <<<<<<<<<<<<<< @@ -4175,7 +6632,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ __pyx_v_d_res = __pyx_v_6gensim_6models_14word2vec_inner_dsdot((&__pyx_v_size), __pyx_v_x, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_y, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":274 + /* "gensim/models/fasttext_inner.pyx":529 * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res # <<<<<<<<<<<<<< @@ -4184,17 +6641,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ __pyx_v_p_res = ((float *)(&__pyx_v_d_res)); - /* "gensim/models/fasttext_inner.pyx":275 + /* "gensim/models/fasttext_inner.pyx":530 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< * our_dot = our_dot_double * our_saxpy = saxpy */ - __pyx_t_5 = ((fabs((__pyx_v_d_res - __pyx_v_expected)) < 0.0001) != 0); - if (__pyx_t_5) { + __pyx_t_4 = ((fabs((__pyx_v_d_res - __pyx_v_expected)) < 0.0001) != 0); + if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":276 + /* "gensim/models/fasttext_inner.pyx":531 * p_res = &d_res * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double # <<<<<<<<<<<<<< @@ -4203,7 +6660,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_double; - /* "gensim/models/fasttext_inner.pyx":277 + /* "gensim/models/fasttext_inner.pyx":532 * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -4212,7 +6669,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_6gensim_6models_14word2vec_inner_saxpy; - /* "gensim/models/fasttext_inner.pyx":278 + /* "gensim/models/fasttext_inner.pyx":533 * our_dot = our_dot_double * our_saxpy = saxpy * return 0 # double # <<<<<<<<<<<<<< @@ -4224,7 +6681,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P __pyx_r = __pyx_int_0; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":275 + /* "gensim/models/fasttext_inner.pyx":530 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -4233,17 +6690,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ } - /* "gensim/models/fasttext_inner.pyx":279 + /* "gensim/models/fasttext_inner.pyx":534 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< * our_dot = our_dot_float * our_saxpy = saxpy */ - __pyx_t_5 = ((fabsf(((__pyx_v_p_res[0]) - __pyx_v_expected)) < 0.0001) != 0); - if (__pyx_t_5) { + __pyx_t_4 = ((fabsf(((__pyx_v_p_res[0]) - __pyx_v_expected)) < 0.0001) != 0); + if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":280 + /* "gensim/models/fasttext_inner.pyx":535 * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float # <<<<<<<<<<<<<< @@ -4252,7 +6709,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_float; - /* "gensim/models/fasttext_inner.pyx":281 + /* "gensim/models/fasttext_inner.pyx":536 * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -4261,7 +6718,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_6gensim_6models_14word2vec_inner_saxpy; - /* "gensim/models/fasttext_inner.pyx":282 + /* "gensim/models/fasttext_inner.pyx":537 * our_dot = our_dot_float * our_saxpy = saxpy * return 1 # float # <<<<<<<<<<<<<< @@ -4273,7 +6730,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P __pyx_r = __pyx_int_1; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":279 + /* "gensim/models/fasttext_inner.pyx":534 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -4282,7 +6739,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ } - /* "gensim/models/fasttext_inner.pyx":286 + /* "gensim/models/fasttext_inner.pyx":541 * # neither => use cython loops, no BLAS * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas # <<<<<<<<<<<<<< @@ -4292,7 +6749,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P /*else*/ { __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_noblas; - /* "gensim/models/fasttext_inner.pyx":287 + /* "gensim/models/fasttext_inner.pyx":542 * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas # <<<<<<<<<<<<<< @@ -4301,7 +6758,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas; - /* "gensim/models/fasttext_inner.pyx":288 + /* "gensim/models/fasttext_inner.pyx":543 * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas * return 2 # <<<<<<<<<<<<<< @@ -4314,7 +6771,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P goto __pyx_L0; } - /* "gensim/models/fasttext_inner.pyx":250 + /* "gensim/models/fasttext_inner.pyx":505 * * * def init(): # <<<<<<<<<<<<<< @@ -4323,9 +6780,6 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2init(CYTHON_UNUSED P */ /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("gensim.models.fasttext_inner.init", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; __pyx_L0:; __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); @@ -4501,7 +6955,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) */ - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 235, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__5, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 235, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -4557,7 +7011,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P * * info.buf = PyArray_DATA(self) */ - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 239, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 239, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -4866,7 +7320,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P * if t == NPY_BYTE: f = "b" * elif t == NPY_UBYTE: f = "B" */ - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__5, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 276, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 276, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -5755,7 +8209,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx * * if ((child.byteorder == c'>' and little_endian) or */ - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 823, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__8, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 823, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -5823,7 +8277,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx * # One could encode it in the format string and have Cython * # complain instead, BUT: < and > in format strings also imply */ - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 827, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__9, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 827, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -5932,7 +8386,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx * * # Until ticket #99 is fixed, use integers to avoid warnings */ - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__8, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 847, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__10, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 847, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_Raise(__pyx_t_4, 0, 0, 0); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; @@ -6612,7 +9066,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { * * cdef inline int import_umath() except -1: */ - __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__9, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 1013, __pyx_L5_except_error) + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__11, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 1013, __pyx_L5_except_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_Raise(__pyx_t_8, 0, 0, 0); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; @@ -6741,7 +9195,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { * * cdef inline int import_ufunc() except -1: */ - __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__10, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 1019, __pyx_L5_except_error) + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__12, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 1019, __pyx_L5_except_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_Raise(__pyx_t_8, 0, 0, 0); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; @@ -6867,7 +9321,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { * except Exception: * raise ImportError("numpy.core.umath failed to import") # <<<<<<<<<<<<<< */ - __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__11, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 1025, __pyx_L5_except_error) + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__13, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 1025, __pyx_L5_except_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_Raise(__pyx_t_8, 0, 0, 0); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; @@ -6960,10 +9414,11 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_REAL, __pyx_k_REAL, sizeof(__pyx_k_REAL), 0, 0, 1, 1}, {&__pyx_n_s_RuntimeError, __pyx_k_RuntimeError, sizeof(__pyx_k_RuntimeError), 0, 0, 1, 1}, {&__pyx_n_s_ValueError, __pyx_k_ValueError, sizeof(__pyx_k_ValueError), 0, 0, 1, 1}, - {&__pyx_n_s__12, __pyx_k__12, sizeof(__pyx_k__12), 0, 0, 1, 1}, + {&__pyx_n_s__14, __pyx_k__14, sizeof(__pyx_k__14), 0, 0, 1, 1}, {&__pyx_n_s_alpha, __pyx_k_alpha, sizeof(__pyx_k_alpha), 0, 0, 1, 1}, {&__pyx_n_s_alpha_2, __pyx_k_alpha_2, sizeof(__pyx_k_alpha_2), 0, 0, 1, 1}, {&__pyx_n_s_array, __pyx_k_array, sizeof(__pyx_k_array), 0, 0, 1, 1}, + {&__pyx_n_s_cbow_mean, __pyx_k_cbow_mean, sizeof(__pyx_k_cbow_mean), 0, 0, 1, 1}, {&__pyx_n_s_cline_in_traceback, __pyx_k_cline_in_traceback, sizeof(__pyx_k_cline_in_traceback), 0, 0, 1, 1}, {&__pyx_n_s_code, __pyx_k_code, sizeof(__pyx_k_code), 0, 0, 1, 1}, {&__pyx_n_s_codelens, __pyx_k_codelens, sizeof(__pyx_k_codelens), 0, 0, 1, 1}, @@ -7002,6 +9457,8 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_kp_u_ndarray_is_not_C_contiguous, __pyx_k_ndarray_is_not_C_contiguous, sizeof(__pyx_k_ndarray_is_not_C_contiguous), 0, 1, 0, 0}, {&__pyx_kp_u_ndarray_is_not_Fortran_contiguou, __pyx_k_ndarray_is_not_Fortran_contiguou, sizeof(__pyx_k_ndarray_is_not_Fortran_contiguou), 0, 1, 0, 0}, {&__pyx_n_s_negative, __pyx_k_negative, sizeof(__pyx_k_negative), 0, 0, 1, 1}, + {&__pyx_n_s_neu1, __pyx_k_neu1, sizeof(__pyx_k_neu1), 0, 0, 1, 1}, + {&__pyx_n_s_neu1_2, __pyx_k_neu1_2, sizeof(__pyx_k_neu1_2), 0, 0, 1, 1}, {&__pyx_n_s_next_random, __pyx_k_next_random, sizeof(__pyx_k_next_random), 0, 0, 1, 1}, {&__pyx_n_s_ngrams, __pyx_k_ngrams, sizeof(__pyx_k_ngrams), 0, 0, 1, 1}, {&__pyx_n_s_ngrams_word, __pyx_k_ngrams_word, sizeof(__pyx_k_ngrams_word), 0, 0, 1, 1}, @@ -7024,6 +9481,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_sentence_idx, __pyx_k_sentence_idx, sizeof(__pyx_k_sentence_idx), 0, 0, 1, 1}, {&__pyx_n_s_sentences, __pyx_k_sentences, sizeof(__pyx_k_sentences), 0, 0, 1, 1}, {&__pyx_n_s_size, __pyx_k_size, sizeof(__pyx_k_size), 0, 0, 1, 1}, + {&__pyx_n_s_subword_arrays, __pyx_k_subword_arrays, sizeof(__pyx_k_subword_arrays), 0, 0, 1, 1}, {&__pyx_n_s_subword_i, __pyx_k_subword_i, sizeof(__pyx_k_subword_i), 0, 0, 1, 1}, {&__pyx_n_s_subwords, __pyx_k_subwords, sizeof(__pyx_k_subwords), 0, 0, 1, 1}, {&__pyx_n_s_subwords_idx, __pyx_k_subwords_idx, sizeof(__pyx_k_subwords_idx), 0, 0, 1, 1}, @@ -7036,6 +9494,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_syn1neg, __pyx_k_syn1neg, sizeof(__pyx_k_syn1neg), 0, 0, 1, 1}, {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, {&__pyx_n_s_token, __pyx_k_token, sizeof(__pyx_k_token), 0, 0, 1, 1}, + {&__pyx_n_s_train_batch_cbow, __pyx_k_train_batch_cbow, sizeof(__pyx_k_train_batch_cbow), 0, 0, 1, 1}, {&__pyx_n_s_train_batch_sg, __pyx_k_train_batch_sg, sizeof(__pyx_k_train_batch_sg), 0, 0, 1, 1}, {&__pyx_n_s_uint32, __pyx_k_uint32, sizeof(__pyx_k_uint32), 0, 0, 1, 1}, {&__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_k_unknown_dtype_code_in_numpy_pxd, sizeof(__pyx_k_unknown_dtype_code_in_numpy_pxd), 0, 1, 0, 0}, @@ -7055,9 +9514,9 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {0, 0, 0, 0, 0, 0, 0} }; static int __Pyx_InitCachedBuiltins(void) { - __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_n_s_ImportError); if (!__pyx_builtin_ImportError) __PYX_ERR(0, 19, __pyx_L1_error) - __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 63, __pyx_L1_error) - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(0, 225, __pyx_L1_error) + __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_n_s_ImportError); if (!__pyx_builtin_ImportError) __PYX_ERR(0, 22, __pyx_L1_error) + __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 67, __pyx_L1_error) + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(0, 357, __pyx_L1_error) __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) __PYX_ERR(1, 235, __pyx_L1_error) __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) __PYX_ERR(1, 823, __pyx_L1_error) return 0; @@ -7069,20 +9528,34 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - /* "gensim/models/fasttext_inner.pyx":181 + /* "gensim/models/fasttext_inner.pyx":311 * cum_table_len = len(model.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 181, __pyx_L1_error) + __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 311, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple_); __Pyx_GIVEREF(__pyx_tuple_); - __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(0, 181, __pyx_L1_error) + __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(0, 311, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); + /* "gensim/models/fasttext_inner.pyx":434 + * cum_table_len = len(model.cum_table) + * if negative or sample: + * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< + * + * # convert Python structures to primitive types, so we can release the GIL + */ + __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 434, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__3); + __Pyx_GIVEREF(__pyx_tuple__3); + __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(0, 434, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__4); + __Pyx_GIVEREF(__pyx_tuple__4); + /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":235 * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): @@ -7090,9 +9563,9 @@ static int __Pyx_InitCachedConstants(void) { * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) */ - __pyx_tuple__3 = PyTuple_Pack(1, __pyx_kp_u_ndarray_is_not_C_contiguous); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(1, 235, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__3); - __Pyx_GIVEREF(__pyx_tuple__3); + __pyx_tuple__5 = PyTuple_Pack(1, __pyx_kp_u_ndarray_is_not_C_contiguous); if (unlikely(!__pyx_tuple__5)) __PYX_ERR(1, 235, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__5); + __Pyx_GIVEREF(__pyx_tuple__5); /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":239 * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) @@ -7101,9 +9574,9 @@ static int __Pyx_InitCachedConstants(void) { * * info.buf = PyArray_DATA(self) */ - __pyx_tuple__4 = PyTuple_Pack(1, __pyx_kp_u_ndarray_is_not_Fortran_contiguou); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(1, 239, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__4); - __Pyx_GIVEREF(__pyx_tuple__4); + __pyx_tuple__6 = PyTuple_Pack(1, __pyx_kp_u_ndarray_is_not_Fortran_contiguou); if (unlikely(!__pyx_tuple__6)) __PYX_ERR(1, 239, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__6); + __Pyx_GIVEREF(__pyx_tuple__6); /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":276 * if ((descr.byteorder == c'>' and little_endian) or @@ -7112,9 +9585,9 @@ static int __Pyx_InitCachedConstants(void) { * if t == NPY_BYTE: f = "b" * elif t == NPY_UBYTE: f = "B" */ - __pyx_tuple__5 = PyTuple_Pack(1, __pyx_kp_u_Non_native_byte_order_not_suppor); if (unlikely(!__pyx_tuple__5)) __PYX_ERR(1, 276, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__5); - __Pyx_GIVEREF(__pyx_tuple__5); + __pyx_tuple__7 = PyTuple_Pack(1, __pyx_kp_u_Non_native_byte_order_not_suppor); if (unlikely(!__pyx_tuple__7)) __PYX_ERR(1, 276, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__7); + __Pyx_GIVEREF(__pyx_tuple__7); /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":823 * @@ -7123,9 +9596,9 @@ static int __Pyx_InitCachedConstants(void) { * * if ((child.byteorder == c'>' and little_endian) or */ - __pyx_tuple__6 = PyTuple_Pack(1, __pyx_kp_u_Format_string_allocated_too_shor); if (unlikely(!__pyx_tuple__6)) __PYX_ERR(1, 823, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__6); - __Pyx_GIVEREF(__pyx_tuple__6); + __pyx_tuple__8 = PyTuple_Pack(1, __pyx_kp_u_Format_string_allocated_too_shor); if (unlikely(!__pyx_tuple__8)) __PYX_ERR(1, 823, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__8); + __Pyx_GIVEREF(__pyx_tuple__8); /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":827 * if ((child.byteorder == c'>' and little_endian) or @@ -7134,9 +9607,9 @@ static int __Pyx_InitCachedConstants(void) { * # One could encode it in the format string and have Cython * # complain instead, BUT: < and > in format strings also imply */ - __pyx_tuple__7 = PyTuple_Pack(1, __pyx_kp_u_Non_native_byte_order_not_suppor); if (unlikely(!__pyx_tuple__7)) __PYX_ERR(1, 827, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__7); - __Pyx_GIVEREF(__pyx_tuple__7); + __pyx_tuple__9 = PyTuple_Pack(1, __pyx_kp_u_Non_native_byte_order_not_suppor); if (unlikely(!__pyx_tuple__9)) __PYX_ERR(1, 827, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__9); + __Pyx_GIVEREF(__pyx_tuple__9); /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":847 * t = child.type_num @@ -7145,9 +9618,9 @@ static int __Pyx_InitCachedConstants(void) { * * # Until ticket #99 is fixed, use integers to avoid warnings */ - __pyx_tuple__8 = PyTuple_Pack(1, __pyx_kp_u_Format_string_allocated_too_shor_2); if (unlikely(!__pyx_tuple__8)) __PYX_ERR(1, 847, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__8); - __Pyx_GIVEREF(__pyx_tuple__8); + __pyx_tuple__10 = PyTuple_Pack(1, __pyx_kp_u_Format_string_allocated_too_shor_2); if (unlikely(!__pyx_tuple__10)) __PYX_ERR(1, 847, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__10); + __Pyx_GIVEREF(__pyx_tuple__10); /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1013 * _import_array() @@ -7156,9 +9629,9 @@ static int __Pyx_InitCachedConstants(void) { * * cdef inline int import_umath() except -1: */ - __pyx_tuple__9 = PyTuple_Pack(1, __pyx_kp_s_numpy_core_multiarray_failed_to); if (unlikely(!__pyx_tuple__9)) __PYX_ERR(1, 1013, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__9); - __Pyx_GIVEREF(__pyx_tuple__9); + __pyx_tuple__11 = PyTuple_Pack(1, __pyx_kp_s_numpy_core_multiarray_failed_to); if (unlikely(!__pyx_tuple__11)) __PYX_ERR(1, 1013, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__11); + __Pyx_GIVEREF(__pyx_tuple__11); /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1019 * _import_umath() @@ -7167,42 +9640,54 @@ static int __Pyx_InitCachedConstants(void) { * * cdef inline int import_ufunc() except -1: */ - __pyx_tuple__10 = PyTuple_Pack(1, __pyx_kp_s_numpy_core_umath_failed_to_impor); if (unlikely(!__pyx_tuple__10)) __PYX_ERR(1, 1019, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__10); - __Pyx_GIVEREF(__pyx_tuple__10); + __pyx_tuple__12 = PyTuple_Pack(1, __pyx_kp_s_numpy_core_umath_failed_to_impor); if (unlikely(!__pyx_tuple__12)) __PYX_ERR(1, 1019, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__12); + __Pyx_GIVEREF(__pyx_tuple__12); /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1025 * _import_umath() * except Exception: * raise ImportError("numpy.core.umath failed to import") # <<<<<<<<<<<<<< */ - __pyx_tuple__11 = PyTuple_Pack(1, __pyx_kp_s_numpy_core_umath_failed_to_impor); if (unlikely(!__pyx_tuple__11)) __PYX_ERR(1, 1025, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__11); - __Pyx_GIVEREF(__pyx_tuple__11); + __pyx_tuple__13 = PyTuple_Pack(1, __pyx_kp_s_numpy_core_umath_failed_to_impor); if (unlikely(!__pyx_tuple__13)) __PYX_ERR(1, 1025, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__13); + __Pyx_GIVEREF(__pyx_tuple__13); - /* "gensim/models/fasttext_inner.pyx":130 - * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) + /* "gensim/models/fasttext_inner.pyx":258 + * * * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__13 = PyTuple_Pack(46, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_l1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_l1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword_i); if (unlikely(!__pyx_tuple__13)) __PYX_ERR(0, 130, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__13); - __Pyx_GIVEREF(__pyx_tuple__13); - __pyx_codeobj__14 = (PyObject*)__Pyx_PyCode_New(5, 0, 46, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__13, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_train_batch_sg, 130, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__14)) __PYX_ERR(0, 130, __pyx_L1_error) + __pyx_tuple__15 = PyTuple_Pack(47, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_l1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_l1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword_i); if (unlikely(!__pyx_tuple__15)) __PYX_ERR(0, 258, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__15); + __Pyx_GIVEREF(__pyx_tuple__15); + __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(5, 0, 47, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_train_batch_sg, 258, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) __PYX_ERR(0, 258, __pyx_L1_error) + + /* "gensim/models/fasttext_inner.pyx":382 + * + * + * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< + * cdef int hs = model.hs + * cdef int negative = model.negative + */ + __pyx_tuple__17 = PyTuple_Pack(48, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_cbow_mean, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_neu1_2, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword_i); if (unlikely(!__pyx_tuple__17)) __PYX_ERR(0, 382, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__17); + __Pyx_GIVEREF(__pyx_tuple__17); + __pyx_codeobj__18 = (PyObject*)__Pyx_PyCode_New(5, 0, 48, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__17, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_train_batch_cbow, 382, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__18)) __PYX_ERR(0, 382, __pyx_L1_error) - /* "gensim/models/fasttext_inner.pyx":250 + /* "gensim/models/fasttext_inner.pyx":505 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_tuple__15 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__15)) __PYX_ERR(0, 250, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__15); - __Pyx_GIVEREF(__pyx_tuple__15); - __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_init, 250, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) __PYX_ERR(0, 250, __pyx_L1_error) + __pyx_tuple__19 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__19)) __PYX_ERR(0, 505, __pyx_L1_error) + __Pyx_GOTREF(__pyx_tuple__19); + __Pyx_GIVEREF(__pyx_tuple__19); + __pyx_codeobj__20 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__19, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_init, 505, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__20)) __PYX_ERR(0, 505, __pyx_L1_error) __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -7408,40 +9893,40 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error) #endif - /* "gensim/models/fasttext_inner.pyx":5 + /* "gensim/models/fasttext_inner.pyx":8 * * import cython * import numpy as np # <<<<<<<<<<<<<< * cimport numpy as np * */ - __pyx_t_3 = __Pyx_Import(__pyx_n_s_numpy, 0, -1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 5, __pyx_L1_error) + __pyx_t_3 = __Pyx_Import(__pyx_n_s_numpy, 0, -1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 8, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_3) < 0) __PYX_ERR(0, 5, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_3) < 0) __PYX_ERR(0, 8, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":13 + /* "gensim/models/fasttext_inner.pyx":16 * from libc.stdio cimport printf * from libc.stdlib cimport calloc, free * from gensim import matutils # <<<<<<<<<<<<<< * * */ - __pyx_t_3 = PyList_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 13, __pyx_L1_error) + __pyx_t_3 = PyList_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 16, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_INCREF(__pyx_n_s_matutils); __Pyx_GIVEREF(__pyx_n_s_matutils); PyList_SET_ITEM(__pyx_t_3, 0, __pyx_n_s_matutils); - __pyx_t_4 = __Pyx_Import(__pyx_n_s_gensim, __pyx_t_3, -1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 13, __pyx_L1_error) + __pyx_t_4 = __Pyx_Import(__pyx_n_s_gensim, __pyx_t_3, -1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 16, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_4, __pyx_n_s_matutils); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 13, __pyx_L1_error) + __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_4, __pyx_n_s_matutils); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 16, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_matutils, __pyx_t_3) < 0) __PYX_ERR(0, 13, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_matutils, __pyx_t_3) < 0) __PYX_ERR(0, 16, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "gensim/models/fasttext_inner.pyx":17 + /* "gensim/models/fasttext_inner.pyx":20 * * # scipy <= 0.15 * try: # <<<<<<<<<<<<<< @@ -7457,28 +9942,28 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) __Pyx_XGOTREF(__pyx_t_7); /*try:*/ { - /* "gensim/models/fasttext_inner.pyx":18 + /* "gensim/models/fasttext_inner.pyx":21 * # scipy <= 0.15 * try: * from scipy.linalg.blas import fblas # <<<<<<<<<<<<<< * except ImportError: * # in scipy > 0.15, fblas function has been removed */ - __pyx_t_4 = PyList_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 18, __pyx_L2_error) + __pyx_t_4 = PyList_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 21, __pyx_L2_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_INCREF(__pyx_n_s_fblas); __Pyx_GIVEREF(__pyx_n_s_fblas); PyList_SET_ITEM(__pyx_t_4, 0, __pyx_n_s_fblas); - __pyx_t_3 = __Pyx_Import(__pyx_n_s_scipy_linalg_blas, __pyx_t_4, -1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 18, __pyx_L2_error) + __pyx_t_3 = __Pyx_Import(__pyx_n_s_scipy_linalg_blas, __pyx_t_4, -1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 21, __pyx_L2_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_fblas); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 18, __pyx_L2_error) + __pyx_t_4 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_fblas); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 21, __pyx_L2_error) __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_fblas, __pyx_t_4) < 0) __PYX_ERR(0, 18, __pyx_L2_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_fblas, __pyx_t_4) < 0) __PYX_ERR(0, 21, __pyx_L2_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":17 + /* "gensim/models/fasttext_inner.pyx":20 * * # scipy <= 0.15 * try: # <<<<<<<<<<<<<< @@ -7494,7 +9979,7 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":19 + /* "gensim/models/fasttext_inner.pyx":22 * try: * from scipy.linalg.blas import fblas * except ImportError: # <<<<<<<<<<<<<< @@ -7504,27 +9989,27 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) __pyx_t_8 = __Pyx_PyErr_ExceptionMatches(__pyx_builtin_ImportError); if (__pyx_t_8) { __Pyx_AddTraceback("gensim.models.fasttext_inner", __pyx_clineno, __pyx_lineno, __pyx_filename); - if (__Pyx_GetException(&__pyx_t_3, &__pyx_t_4, &__pyx_t_9) < 0) __PYX_ERR(0, 19, __pyx_L4_except_error) + if (__Pyx_GetException(&__pyx_t_3, &__pyx_t_4, &__pyx_t_9) < 0) __PYX_ERR(0, 22, __pyx_L4_except_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_GOTREF(__pyx_t_4); __Pyx_GOTREF(__pyx_t_9); - /* "gensim/models/fasttext_inner.pyx":21 + /* "gensim/models/fasttext_inner.pyx":24 * except ImportError: * # in scipy > 0.15, fblas function has been removed * import scipy.linalg.blas as fblas # <<<<<<<<<<<<<< * * from word2vec_inner cimport bisect_left, random_int32, \ */ - __pyx_t_10 = PyList_New(1); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 21, __pyx_L4_except_error) + __pyx_t_10 = PyList_New(1); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 24, __pyx_L4_except_error) __Pyx_GOTREF(__pyx_t_10); - __Pyx_INCREF(__pyx_n_s__12); - __Pyx_GIVEREF(__pyx_n_s__12); - PyList_SET_ITEM(__pyx_t_10, 0, __pyx_n_s__12); - __pyx_t_11 = __Pyx_Import(__pyx_n_s_scipy_linalg_blas, __pyx_t_10, -1); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 21, __pyx_L4_except_error) + __Pyx_INCREF(__pyx_n_s__14); + __Pyx_GIVEREF(__pyx_n_s__14); + PyList_SET_ITEM(__pyx_t_10, 0, __pyx_n_s__14); + __pyx_t_11 = __Pyx_Import(__pyx_n_s_scipy_linalg_blas, __pyx_t_10, -1); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 24, __pyx_L4_except_error) __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_fblas, __pyx_t_11) < 0) __PYX_ERR(0, 21, __pyx_L4_except_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_fblas, __pyx_t_11) < 0) __PYX_ERR(0, 24, __pyx_L4_except_error) __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; @@ -7534,7 +10019,7 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) goto __pyx_L4_except_error; __pyx_L4_except_error:; - /* "gensim/models/fasttext_inner.pyx":17 + /* "gensim/models/fasttext_inner.pyx":20 * * # scipy <= 0.15 * try: # <<<<<<<<<<<<<< @@ -7554,43 +10039,43 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) __pyx_L7_try_end:; } - /* "gensim/models/fasttext_inner.pyx":29 + /* "gensim/models/fasttext_inner.pyx":32 * our_dot_double, our_dot_float, our_dot_noblas, our_saxpy_noblas * * REAL = np.float32 # <<<<<<<<<<<<<< * * DEF MAX_SENTENCE_LEN = 10000 */ - __pyx_t_9 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 29, __pyx_L1_error) + __pyx_t_9 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 32, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_9, __pyx_n_s_float32); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 29, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_9, __pyx_n_s_float32); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 32, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_REAL, __pyx_t_4) < 0) __PYX_ERR(0, 29, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_REAL, __pyx_t_4) < 0) __PYX_ERR(0, 32, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "gensim/models/fasttext_inner.pyx":33 + /* "gensim/models/fasttext_inner.pyx":36 * DEF MAX_SENTENCE_LEN = 10000 * DEF MAX_SUBWORDS = 1000 * from word2vec import FAST_VERSION # <<<<<<<<<<<<<< * * DEF EXP_TABLE_SIZE = 1000 */ - __pyx_t_4 = PyList_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 33, __pyx_L1_error) + __pyx_t_4 = PyList_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 36, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_INCREF(__pyx_n_s_FAST_VERSION); __Pyx_GIVEREF(__pyx_n_s_FAST_VERSION); PyList_SET_ITEM(__pyx_t_4, 0, __pyx_n_s_FAST_VERSION); - __pyx_t_9 = __Pyx_Import(__pyx_n_s_word2vec, __pyx_t_4, -1); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 33, __pyx_L1_error) + __pyx_t_9 = __Pyx_Import(__pyx_n_s_word2vec, __pyx_t_4, -1); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 36, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_ImportFrom(__pyx_t_9, __pyx_n_s_FAST_VERSION); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 33, __pyx_L1_error) + __pyx_t_4 = __Pyx_ImportFrom(__pyx_t_9, __pyx_n_s_FAST_VERSION); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 36, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_4) < 0) __PYX_ERR(0, 33, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_4) < 0) __PYX_ERR(0, 36, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":41 + /* "gensim/models/fasttext_inner.pyx":44 * cdef REAL_t[EXP_TABLE_SIZE] LOG_TABLE * * cdef int ONE = 1 # <<<<<<<<<<<<<< @@ -7599,46 +10084,58 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) */ __pyx_v_6gensim_6models_14fasttext_inner_ONE = 1; - /* "gensim/models/fasttext_inner.pyx":42 + /* "gensim/models/fasttext_inner.pyx":45 * * cdef int ONE = 1 * cdef REAL_t ONEF = 1.0 # <<<<<<<<<<<<<< * - * cdef unsigned long long fast_sentence_sg_neg( + * */ __pyx_v_6gensim_6models_14fasttext_inner_ONEF = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)1.0); - /* "gensim/models/fasttext_inner.pyx":130 - * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) + /* "gensim/models/fasttext_inner.pyx":258 + * * * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_1train_batch_sg, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 130, __pyx_L1_error) + __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_1train_batch_sg, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 258, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_sg, __pyx_t_9) < 0) __PYX_ERR(0, 258, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + + /* "gensim/models/fasttext_inner.pyx":382 + * + * + * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< + * cdef int hs = model.hs + * cdef int negative = model.negative + */ + __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_3train_batch_cbow, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 382, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_sg, __pyx_t_9) < 0) __PYX_ERR(0, 130, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_cbow, __pyx_t_9) < 0) __PYX_ERR(0, 382, __pyx_L1_error) __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":250 + /* "gensim/models/fasttext_inner.pyx":505 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_3init, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 250, __pyx_L1_error) + __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_5init, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 505, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_9) < 0) __PYX_ERR(0, 250, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_9) < 0) __PYX_ERR(0, 505, __pyx_L1_error) __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":290 + /* "gensim/models/fasttext_inner.pyx":545 * return 2 * * FAST_VERSION = init() # initialize the module # <<<<<<<<<<<<<< * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN */ - __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 290, __pyx_L1_error) + __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 545, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_3 = NULL; if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_4))) { @@ -7651,27 +10148,27 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) } } if (__pyx_t_3) { - __pyx_t_9 = __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_t_3); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 290, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_t_3); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 545, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { - __pyx_t_9 = __Pyx_PyObject_CallNoArg(__pyx_t_4); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 290, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_CallNoArg(__pyx_t_4); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 545, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_9) < 0) __PYX_ERR(0, 290, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_9) < 0) __PYX_ERR(0, 545, __pyx_L1_error) __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":291 + /* "gensim/models/fasttext_inner.pyx":546 * * FAST_VERSION = init() # initialize the module * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN # <<<<<<<<<<<<<< */ - if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_int_10000) < 0) __PYX_ERR(0, 291, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_int_10000) < 0) __PYX_ERR(0, 546, __pyx_L1_error) /* "gensim/models/fasttext_inner.pyx":1 * #!/usr/bin/env cython # <<<<<<<<<<<<<< - * # coding: utf-8 - * + * # cython: boundscheck=False + * # cython: wraparound=False */ __pyx_t_9 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 1, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); @@ -7748,72 +10245,6 @@ static PyObject *__Pyx_GetBuiltinName(PyObject *name) { return result; } -/* PyErrFetchRestore */ -#if CYTHON_FAST_THREAD_STATE -static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { - PyObject *tmp_type, *tmp_value, *tmp_tb; - tmp_type = tstate->curexc_type; - tmp_value = tstate->curexc_value; - tmp_tb = tstate->curexc_traceback; - tstate->curexc_type = type; - tstate->curexc_value = value; - tstate->curexc_traceback = tb; - Py_XDECREF(tmp_type); - Py_XDECREF(tmp_value); - Py_XDECREF(tmp_tb); -} -static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { - *type = tstate->curexc_type; - *value = tstate->curexc_value; - *tb = tstate->curexc_traceback; - tstate->curexc_type = 0; - tstate->curexc_value = 0; - tstate->curexc_traceback = 0; -} -#endif - -/* WriteUnraisableException */ -static void __Pyx_WriteUnraisable(const char *name, CYTHON_UNUSED int clineno, - CYTHON_UNUSED int lineno, CYTHON_UNUSED const char *filename, - int full_traceback, CYTHON_UNUSED int nogil) { - PyObject *old_exc, *old_val, *old_tb; - PyObject *ctx; - __Pyx_PyThreadState_declare -#ifdef WITH_THREAD - PyGILState_STATE state; - if (nogil) - state = PyGILState_Ensure(); -#ifdef _MSC_VER - else state = (PyGILState_STATE)-1; -#endif -#endif - __Pyx_PyThreadState_assign - __Pyx_ErrFetch(&old_exc, &old_val, &old_tb); - if (full_traceback) { - Py_XINCREF(old_exc); - Py_XINCREF(old_val); - Py_XINCREF(old_tb); - __Pyx_ErrRestore(old_exc, old_val, old_tb); - PyErr_PrintEx(1); - } - #if PY_MAJOR_VERSION < 3 - ctx = PyString_FromString(name); - #else - ctx = PyUnicode_FromString(name); - #endif - __Pyx_ErrRestore(old_exc, old_val, old_tb); - if (!ctx) { - PyErr_WriteUnraisable(Py_None); - } else { - PyErr_WriteUnraisable(ctx); - Py_DECREF(ctx); - } -#ifdef WITH_THREAD - if (nogil) - PyGILState_Release(state); -#endif -} - /* RaiseArgTupleInvalid */ static void __Pyx_RaiseArgtupleInvalid( const char* func_name, @@ -8150,6 +10581,30 @@ static CYTHON_INLINE PyObject * __Pyx_PyCFunction_FastCall(PyObject *func_obj, P } #endif +/* PyErrFetchRestore */ + #if CYTHON_FAST_THREAD_STATE +static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { + PyObject *tmp_type, *tmp_value, *tmp_tb; + tmp_type = tstate->curexc_type; + tmp_value = tstate->curexc_value; + tmp_tb = tstate->curexc_traceback; + tstate->curexc_type = type; + tstate->curexc_value = value; + tstate->curexc_traceback = tb; + Py_XDECREF(tmp_type); + Py_XDECREF(tmp_value); + Py_XDECREF(tmp_tb); +} +static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { + *type = tstate->curexc_type; + *value = tstate->curexc_value; + *tb = tstate->curexc_traceback; + tstate->curexc_type = 0; + tstate->curexc_value = 0; + tstate->curexc_traceback = 0; +} +#endif + /* RaiseException */ #if PY_MAJOR_VERSION < 3 static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, diff --git a/gensim/models/fasttext_inner.pyx b/gensim/models/fasttext_inner.pyx index fb9cb08c1f..6ae1cb7431 100644 --- a/gensim/models/fasttext_inner.pyx +++ b/gensim/models/fasttext_inner.pyx @@ -1,4 +1,7 @@ #!/usr/bin/env cython +# cython: boundscheck=False +# cython: wraparound=False +# cython: cdivision=True # coding: utf-8 import cython @@ -41,6 +44,7 @@ cdef REAL_t[EXP_TABLE_SIZE] LOG_TABLE cdef int ONE = 1 cdef REAL_t ONEF = 1.0 + cdef unsigned long long fast_sentence_sg_neg( const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1neg, const int size, @@ -90,6 +94,7 @@ cdef unsigned long long fast_sentence_sg_neg( our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) return next_random + cdef void fast_sentence_sg_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1, const int size, @@ -127,6 +132,129 @@ cdef void fast_sentence_sg_hs( for d in range(1, subwords_len): our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) + +cdef unsigned long long fast_sentence_cbow_neg( + const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, int codelens[MAX_SENTENCE_LEN], + REAL_t *neu1, REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1neg, const int size, + const np.uint32_t indexes[MAX_SENTENCE_LEN], const np.uint32_t *subwords_idx[MAX_SENTENCE_LEN], + const int subwords_idx_len[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, + int i, int j, int k, int cbow_mean, unsigned long long next_random, REAL_t *word_locks_ngrams, REAL_t *word_locks_vocab) nogil: + + cdef long long a + cdef long long row2 + cdef unsigned long long modulo = 281474976710655ULL + cdef REAL_t f, g, count, inv_count = 1.0, label, log_e_f_dot, f_dot + cdef np.uint32_t target_index, word_index + cdef int d, m + + word_index = indexes[i] + + memset(neu1, 0, size * cython.sizeof(REAL_t)) + count = 0.0 + for m in range(j, k): + if m == i: + continue + else: + count += ONEF + our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) + for d in range(1, subwords_idx_len[m]): + count += ONEF + our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + + if count > (0.5): + inv_count = ONEF/count + if cbow_mean: + sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + + memset(work, 0, size * cython.sizeof(REAL_t)) + + for d in range(negative+1): + if d == 0: + target_index = word_index + label = ONEF + else: + target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + next_random = (next_random * 25214903917ULL + 11) & modulo + if target_index == word_index: + continue + label = 0.0 + + row2 = target_index * size + f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) + if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: + continue + f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + g = (label - f) * alpha + + our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + + if not cbow_mean: # divide error over summed window vectors + sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + + for m in range(j,k): + if m == i: + continue + else: + our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) + for d in range(1, subwords_idx_len[m]): + our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) + + return next_random + + +cdef void fast_sentence_cbow_hs( + const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], + REAL_t *neu1, REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1, const int size, + const np.uint32_t indexes[MAX_SENTENCE_LEN], const np.uint32_t *subwords_idx[MAX_SENTENCE_LEN], + const int subwords_idx_len[MAX_SENTENCE_LEN],const REAL_t alpha, REAL_t *work, + int i, int j, int k, int cbow_mean, REAL_t *word_locks_ngrams, REAL_t *word_locks_vocab) nogil: + + cdef long long a, b + cdef long long row2, sgn + cdef REAL_t f, g, count, inv_count = 1.0, f_dot, lprob + cdef int m + + memset(neu1, 0, size * cython.sizeof(REAL_t)) + count = 0.0 + for m in range(j, k): + if m == i: + continue + else: + count += ONEF + our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) + for d in range(1, subwords_idx_len[m]): + count += ONEF + our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + if count > (0.5): + inv_count = ONEF/count + if cbow_mean: + sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + + memset(work, 0, size * cython.sizeof(REAL_t)) + for b in range(codelens[i]): + row2 = word_point[b] * size + f_dot = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) + if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: + continue + f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + g = (1 - word_code[b] - f) * alpha + + our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) + + if not cbow_mean: # divide error over summed window vectors + sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + + for m in range(j,k): + if m == i: + continue + else: + our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) + for d in range(1, subwords_idx_len[m]): + our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) + + def train_batch_sg(model, sentences, alpha, _work, _l1): cdef int hs = model.hs cdef int negative = model.negative @@ -251,6 +379,129 @@ def train_batch_sg(model, sentences, alpha, _work, _l1): return effective_words +def train_batch_cbow(model, sentences, alpha, _work, _neu1): + cdef int hs = model.hs + cdef int negative = model.negative + cdef int sample = (model.sample != 0) + cdef int cbow_mean = model.cbow_mean + + cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) + cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) + cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) + cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) + + cdef REAL_t *work + cdef REAL_t _alpha = alpha + cdef int size = model.layer1_size + + cdef int codelens[MAX_SENTENCE_LEN] + cdef np.uint32_t indexes[MAX_SENTENCE_LEN] + cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] + cdef int sentence_idx[MAX_SENTENCE_LEN + 1] + cdef int window = model.window + + cdef int i, j, k + cdef int effective_words = 0, effective_sentences = 0 + cdef int sent_idx, idx_start, idx_end + + # For hierarchical softmax + cdef REAL_t *syn1 + cdef np.uint32_t *points[MAX_SENTENCE_LEN] + cdef np.uint8_t *codes[MAX_SENTENCE_LEN] + + # For negative sampling + cdef REAL_t *syn1neg + cdef np.uint32_t *cum_table + cdef unsigned long long cum_table_len + # for sampling (negative and frequent-word downsampling) + cdef unsigned long long next_random + + # For passing subwords information as C objects for nogil + cdef int subwords_idx_len[MAX_SENTENCE_LEN] + cdef np.uint32_t *subwords_idx[MAX_SENTENCE_LEN] + # dummy dictionary to ensure that the memory locations that subwords_idx point to + # are referenced throughout so that it isn't put back to free memory pool by Python's memory manager + subword_arrays = {} + + if hs: + syn1 = (np.PyArray_DATA(model.syn1)) + + if negative: + syn1neg = (np.PyArray_DATA(model.syn1neg)) + cum_table = (np.PyArray_DATA(model.cum_table)) + cum_table_len = len(model.cum_table) + if negative or sample: + next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) + + # convert Python structures to primitive types, so we can release the GIL + work = np.PyArray_DATA(_work) + neu1 = np.PyArray_DATA(_neu1) + + # prepare C structures so we can go "full C" and release the Python GIL + vlookup = model.wv.vocab + sentence_idx[0] = 0 # indices of the first sentence always start at 0 + for sent in sentences: + if not sent: + continue # ignore empty sentences; leave effective_sentences unchanged + for token in sent: + word = vlookup[token] if token in vlookup else None + if word is None: + continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window + if sample and word.sample_int < random_int32(&next_random): + continue + indexes[effective_words] = word.index + + subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] + word_subwords = np.array([word.index] + subwords, dtype=np.uint32) + subwords_idx_len[effective_words] = (len(subwords) + 1) + subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) + # ensures reference count of word_subwords doesn't reach 0 + subword_arrays[effective_words] = word_subwords + + if hs: + codelens[effective_words] = len(word.code) + codes[effective_words] = np.PyArray_DATA(word.code) + points[effective_words] = np.PyArray_DATA(word.point) + effective_words += 1 + if effective_words == MAX_SENTENCE_LEN: + break # TODO: log warning, tally overflow? + + # keep track of which words go into which sentence, so we don't train + # across sentence boundaries. + # indices of sentence number X are between idx_end: + k = idx_end + + if hs: + fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0_vocab, syn0_ngrams, syn1, size, indexes,subwords_idx, + subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, word_locks_ngrams) + if negative: + next_random = fast_sentence_cbow_neg(negative, cum_table, cum_table_len, codelens, neu1, syn0_vocab, syn0_ngrams, syn1neg, size, indexes, subwords_idx, + subwords_idx_len, _alpha, work, i, j, k, cbow_mean, next_random, word_locks_vocab, word_locks_ngrams) + + return effective_words + + def init(): """ Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized From 772267af74e35781f5981ec89ea173e46b3631c4 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Wed, 29 Nov 2017 13:18:28 +0530 Subject: [PATCH 09/22] cleans extra variables/values --- gensim/models/fasttext_inner.c | 1444 +++++++++++++++--------------- gensim/models/fasttext_inner.pyx | 18 +- 2 files changed, 715 insertions(+), 747 deletions(-) diff --git a/gensim/models/fasttext_inner.c b/gensim/models/fasttext_inner.c index 0f33a8ecdc..87a174975b 100644 --- a/gensim/models/fasttext_inner.c +++ b/gensim/models/fasttext_inner.c @@ -1901,7 +1901,6 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f_dot; __pyx_t_5numpy_uint32_t __pyx_v_target_index; int __pyx_v_d; - __pyx_t_5numpy_uint32_t __pyx_v_subword_row; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_norm_factor; unsigned PY_LONG_LONG __pyx_r; __pyx_t_5numpy_uint32_t __pyx_t_1; @@ -1960,7 +1959,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente * * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) # <<<<<<<<<<<<<< * for d in range(1, subwords_len): - * subword_row = subwords_index[d] * size + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) */ __pyx_v_6gensim_6models_14word2vec_inner_scopy((&__pyx_v_size), (&(__pyx_v_syn0_vocab[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); @@ -1968,8 +1967,8 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente * * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) * for d in range(1, subwords_len): # <<<<<<<<<<<<<< - * subword_row = subwords_index[d] * size - * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) + * cdef REAL_t norm_factor = ONEF / subwords_len */ __pyx_t_1 = __pyx_v_subwords_len; for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { @@ -1978,33 +1977,24 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente /* "gensim/models/fasttext_inner.pyx":68 * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) * for d in range(1, subwords_len): - * subword_row = subwords_index[d] * size # <<<<<<<<<<<<<< - * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) - * cdef REAL_t norm_factor = ONEF / subwords_len - */ - __pyx_v_subword_row = ((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size); - - /* "gensim/models/fasttext_inner.pyx":69 - * for d in range(1, subwords_len): - * subword_row = subwords_index[d] * size - * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) # <<<<<<<<<<<<<< * cdef REAL_t norm_factor = ONEF / subwords_len * sscal(&size, &norm_factor, l1 , &ONE) */ - __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_ngrams[__pyx_v_subword_row])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_ngrams[((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); } - /* "gensim/models/fasttext_inner.pyx":70 - * subword_row = subwords_index[d] * size - * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) + /* "gensim/models/fasttext_inner.pyx":69 + * for d in range(1, subwords_len): + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) * cdef REAL_t norm_factor = ONEF / subwords_len # <<<<<<<<<<<<<< * sscal(&size, &norm_factor, l1 , &ONE) * */ __pyx_v_norm_factor = (__pyx_v_6gensim_6models_14fasttext_inner_ONEF / __pyx_v_subwords_len); - /* "gensim/models/fasttext_inner.pyx":71 - * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) + /* "gensim/models/fasttext_inner.pyx":70 + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) * cdef REAL_t norm_factor = ONEF / subwords_len * sscal(&size, &norm_factor, l1 , &ONE) # <<<<<<<<<<<<<< * @@ -2012,7 +2002,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_norm_factor), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":73 + /* "gensim/models/fasttext_inner.pyx":72 * sscal(&size, &norm_factor, l1 , &ONE) * * for d in range(negative+1): # <<<<<<<<<<<<<< @@ -2023,7 +2013,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_3; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":74 + /* "gensim/models/fasttext_inner.pyx":73 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -2033,7 +2023,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_4 = ((__pyx_v_d == 0) != 0); if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":75 + /* "gensim/models/fasttext_inner.pyx":74 * for d in range(negative+1): * if d == 0: * target_index = word_index # <<<<<<<<<<<<<< @@ -2042,7 +2032,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_target_index = __pyx_v_word_index; - /* "gensim/models/fasttext_inner.pyx":76 + /* "gensim/models/fasttext_inner.pyx":75 * if d == 0: * target_index = word_index * label = ONEF # <<<<<<<<<<<<<< @@ -2051,7 +2041,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_label = __pyx_v_6gensim_6models_14fasttext_inner_ONEF; - /* "gensim/models/fasttext_inner.pyx":74 + /* "gensim/models/fasttext_inner.pyx":73 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -2061,7 +2051,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente goto __pyx_L7; } - /* "gensim/models/fasttext_inner.pyx":78 + /* "gensim/models/fasttext_inner.pyx":77 * label = ONEF * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) # <<<<<<<<<<<<<< @@ -2071,7 +2061,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente /*else*/ { __pyx_v_target_index = __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_v_cum_table, ((__pyx_v_next_random >> 16) % (__pyx_v_cum_table[(__pyx_v_cum_table_len - 1)])), 0, __pyx_v_cum_table_len); - /* "gensim/models/fasttext_inner.pyx":79 + /* "gensim/models/fasttext_inner.pyx":78 * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< @@ -2080,7 +2070,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "gensim/models/fasttext_inner.pyx":80 + /* "gensim/models/fasttext_inner.pyx":79 * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -2090,7 +2080,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_4 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":81 + /* "gensim/models/fasttext_inner.pyx":80 * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: * continue # <<<<<<<<<<<<<< @@ -2099,7 +2089,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ goto __pyx_L5_continue; - /* "gensim/models/fasttext_inner.pyx":80 + /* "gensim/models/fasttext_inner.pyx":79 * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -2108,7 +2098,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":82 + /* "gensim/models/fasttext_inner.pyx":81 * if target_index == word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< @@ -2119,7 +2109,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente } __pyx_L7:; - /* "gensim/models/fasttext_inner.pyx":84 + /* "gensim/models/fasttext_inner.pyx":83 * label = 0.0 * * row2 = target_index * size # <<<<<<<<<<<<<< @@ -2128,7 +2118,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":85 + /* "gensim/models/fasttext_inner.pyx":84 * * row2 = target_index * size * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2137,7 +2127,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_f_dot = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":86 + /* "gensim/models/fasttext_inner.pyx":85 * row2 = target_index * size * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2155,7 +2145,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_L10_bool_binop_done:; if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":87 + /* "gensim/models/fasttext_inner.pyx":86 * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2164,7 +2154,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ goto __pyx_L5_continue; - /* "gensim/models/fasttext_inner.pyx":86 + /* "gensim/models/fasttext_inner.pyx":85 * row2 = target_index * size * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2173,7 +2163,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":88 + /* "gensim/models/fasttext_inner.pyx":87 * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2182,7 +2172,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f_dot + 6.0) * 83.0))]); - /* "gensim/models/fasttext_inner.pyx":89 + /* "gensim/models/fasttext_inner.pyx":88 * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< @@ -2191,7 +2181,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/fasttext_inner.pyx":90 + /* "gensim/models/fasttext_inner.pyx":89 * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2200,7 +2190,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":91 + /* "gensim/models/fasttext_inner.pyx":90 * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, l1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2211,7 +2201,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_L5_continue:; } - /* "gensim/models/fasttext_inner.pyx":92 + /* "gensim/models/fasttext_inner.pyx":91 * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, l1, &ONE, &syn1neg[row2], &ONE) * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) # <<<<<<<<<<<<<< @@ -2220,7 +2210,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[__pyx_v_word2_index])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":93 + /* "gensim/models/fasttext_inner.pyx":92 * our_saxpy(&size, &g, l1, &ONE, &syn1neg[row2], &ONE) * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) * for d in range(1, subwords_len): # <<<<<<<<<<<<<< @@ -2231,7 +2221,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":94 + /* "gensim/models/fasttext_inner.pyx":93 * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) * for d in range(1, subwords_len): * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) # <<<<<<<<<<<<<< @@ -2241,7 +2231,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_ngrams[(__pyx_v_subwords_index[__pyx_v_d])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_ngrams[((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); } - /* "gensim/models/fasttext_inner.pyx":95 + /* "gensim/models/fasttext_inner.pyx":94 * for d in range(1, subwords_len): * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) * return next_random # <<<<<<<<<<<<<< @@ -2264,7 +2254,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente return __pyx_r; } -/* "gensim/models/fasttext_inner.pyx":98 +/* "gensim/models/fasttext_inner.pyx":97 * * * cdef void fast_sentence_sg_hs( # <<<<<<<<<<<<<< @@ -2281,7 +2271,6 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_g; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f_dot; long __pyx_v_d; - __pyx_t_5numpy_uint32_t __pyx_v_subword_row; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_norm_factor; __pyx_t_5numpy_uint32_t __pyx_t_1; long __pyx_t_2; @@ -2290,7 +2279,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t int __pyx_t_5; int __pyx_t_6; - /* "gensim/models/fasttext_inner.pyx":106 + /* "gensim/models/fasttext_inner.pyx":105 * * cdef long long a, b * cdef np.uint32_t word2_index = subwords_index[0] # <<<<<<<<<<<<<< @@ -2299,7 +2288,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_word2_index = (__pyx_v_subwords_index[0]); - /* "gensim/models/fasttext_inner.pyx":107 + /* "gensim/models/fasttext_inner.pyx":106 * cdef long long a, b * cdef np.uint32_t word2_index = subwords_index[0] * cdef long long row1 = word2_index * size, row2, sgn # <<<<<<<<<<<<<< @@ -2308,7 +2297,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":110 + /* "gensim/models/fasttext_inner.pyx":109 * cdef REAL_t f, g, f_dot, lprob * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2317,7 +2306,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":111 + /* "gensim/models/fasttext_inner.pyx":110 * * memset(work, 0, size * cython.sizeof(REAL_t)) * memset(l1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2326,56 +2315,47 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ memset(__pyx_v_l1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":113 + /* "gensim/models/fasttext_inner.pyx":112 * memset(l1, 0, size * cython.sizeof(REAL_t)) * * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) # <<<<<<<<<<<<<< * for d in range(1, subwords_len): - * subword_row = subwords_index[d] * size + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) */ __pyx_v_6gensim_6models_14word2vec_inner_scopy((&__pyx_v_size), (&(__pyx_v_syn0_vocab[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":114 + /* "gensim/models/fasttext_inner.pyx":113 * * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) * for d in range(1, subwords_len): # <<<<<<<<<<<<<< - * subword_row = subwords_index[d] * size - * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) + * cdef REAL_t norm_factor = ONEF / subwords_len */ __pyx_t_1 = __pyx_v_subwords_len; for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":115 + /* "gensim/models/fasttext_inner.pyx":114 * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) * for d in range(1, subwords_len): - * subword_row = subwords_index[d] * size # <<<<<<<<<<<<<< - * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) - * cdef REAL_t norm_factor = ONEF / subwords_len - */ - __pyx_v_subword_row = ((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size); - - /* "gensim/models/fasttext_inner.pyx":116 - * for d in range(1, subwords_len): - * subword_row = subwords_index[d] * size - * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) # <<<<<<<<<<<<<< * cdef REAL_t norm_factor = ONEF / subwords_len * sscal(&size, &norm_factor, l1 , &ONE) */ - __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_ngrams[__pyx_v_subword_row])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_ngrams[((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); } - /* "gensim/models/fasttext_inner.pyx":117 - * subword_row = subwords_index[d] * size - * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) + /* "gensim/models/fasttext_inner.pyx":115 + * for d in range(1, subwords_len): + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) * cdef REAL_t norm_factor = ONEF / subwords_len # <<<<<<<<<<<<<< * sscal(&size, &norm_factor, l1 , &ONE) * */ __pyx_v_norm_factor = (__pyx_v_6gensim_6models_14fasttext_inner_ONEF / __pyx_v_subwords_len); - /* "gensim/models/fasttext_inner.pyx":118 - * our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) + /* "gensim/models/fasttext_inner.pyx":116 + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) * cdef REAL_t norm_factor = ONEF / subwords_len * sscal(&size, &norm_factor, l1 , &ONE) # <<<<<<<<<<<<<< * @@ -2383,7 +2363,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_norm_factor), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":120 + /* "gensim/models/fasttext_inner.pyx":118 * sscal(&size, &norm_factor, l1 , &ONE) * * for b in range(codelen): # <<<<<<<<<<<<<< @@ -2394,7 +2374,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { __pyx_v_b = __pyx_t_4; - /* "gensim/models/fasttext_inner.pyx":121 + /* "gensim/models/fasttext_inner.pyx":119 * * for b in range(codelen): * row2 = word_point[b] * size # <<<<<<<<<<<<<< @@ -2403,7 +2383,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":122 + /* "gensim/models/fasttext_inner.pyx":120 * for b in range(codelen): * row2 = word_point[b] * size * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -2412,7 +2392,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_f_dot = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":123 + /* "gensim/models/fasttext_inner.pyx":121 * row2 = word_point[b] * size * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2430,7 +2410,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t __pyx_L8_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":124 + /* "gensim/models/fasttext_inner.pyx":122 * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2439,7 +2419,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ goto __pyx_L5_continue; - /* "gensim/models/fasttext_inner.pyx":123 + /* "gensim/models/fasttext_inner.pyx":121 * row2 = word_point[b] * size * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2448,7 +2428,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ } - /* "gensim/models/fasttext_inner.pyx":125 + /* "gensim/models/fasttext_inner.pyx":123 * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2457,7 +2437,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f_dot + 6.0) * 83.0))]); - /* "gensim/models/fasttext_inner.pyx":126 + /* "gensim/models/fasttext_inner.pyx":124 * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< @@ -2466,7 +2446,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/fasttext_inner.pyx":128 + /* "gensim/models/fasttext_inner.pyx":126 * g = (1 - word_code[b] - f) * alpha * * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2475,7 +2455,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":129 + /* "gensim/models/fasttext_inner.pyx":127 * * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, l1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -2486,7 +2466,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t __pyx_L5_continue:; } - /* "gensim/models/fasttext_inner.pyx":131 + /* "gensim/models/fasttext_inner.pyx":129 * our_saxpy(&size, &g, l1, &ONE, &syn1[row2], &ONE) * * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) # <<<<<<<<<<<<<< @@ -2495,7 +2475,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[__pyx_v_word2_index])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":132 + /* "gensim/models/fasttext_inner.pyx":130 * * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) * for d in range(1, subwords_len): # <<<<<<<<<<<<<< @@ -2506,7 +2486,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":133 + /* "gensim/models/fasttext_inner.pyx":131 * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) * for d in range(1, subwords_len): * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) # <<<<<<<<<<<<<< @@ -2516,7 +2496,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_ngrams[(__pyx_v_subwords_index[__pyx_v_d])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_ngrams[((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); } - /* "gensim/models/fasttext_inner.pyx":98 + /* "gensim/models/fasttext_inner.pyx":97 * * * cdef void fast_sentence_sg_hs( # <<<<<<<<<<<<<< @@ -2527,7 +2507,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t /* function exit code */ } -/* "gensim/models/fasttext_inner.pyx":136 +/* "gensim/models/fasttext_inner.pyx":134 * * * cdef unsigned long long fast_sentence_cbow_neg( # <<<<<<<<<<<<<< @@ -2557,7 +2537,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente long __pyx_t_6; int __pyx_t_7; - /* "gensim/models/fasttext_inner.pyx":145 + /* "gensim/models/fasttext_inner.pyx":143 * cdef long long a * cdef long long row2 * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< @@ -2566,7 +2546,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_modulo = 281474976710655ULL; - /* "gensim/models/fasttext_inner.pyx":146 + /* "gensim/models/fasttext_inner.pyx":144 * cdef long long row2 * cdef unsigned long long modulo = 281474976710655ULL * cdef REAL_t f, g, count, inv_count = 1.0, label, log_e_f_dot, f_dot # <<<<<<<<<<<<<< @@ -2575,7 +2555,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_inv_count = 1.0; - /* "gensim/models/fasttext_inner.pyx":150 + /* "gensim/models/fasttext_inner.pyx":148 * cdef int d, m * * word_index = indexes[i] # <<<<<<<<<<<<<< @@ -2584,7 +2564,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_word_index = (__pyx_v_indexes[__pyx_v_i]); - /* "gensim/models/fasttext_inner.pyx":152 + /* "gensim/models/fasttext_inner.pyx":150 * word_index = indexes[i] * * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2593,7 +2573,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":153 + /* "gensim/models/fasttext_inner.pyx":151 * * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< @@ -2602,7 +2582,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_count = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); - /* "gensim/models/fasttext_inner.pyx":154 + /* "gensim/models/fasttext_inner.pyx":152 * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 * for m in range(j, k): # <<<<<<<<<<<<<< @@ -2613,7 +2593,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":155 + /* "gensim/models/fasttext_inner.pyx":153 * count = 0.0 * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< @@ -2623,7 +2603,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_3 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":156 + /* "gensim/models/fasttext_inner.pyx":154 * for m in range(j, k): * if m == i: * continue # <<<<<<<<<<<<<< @@ -2632,7 +2612,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ goto __pyx_L3_continue; - /* "gensim/models/fasttext_inner.pyx":155 + /* "gensim/models/fasttext_inner.pyx":153 * count = 0.0 * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< @@ -2641,47 +2621,47 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":158 + /* "gensim/models/fasttext_inner.pyx":156 * continue * else: * count += ONEF # <<<<<<<<<<<<<< * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) - * for d in range(1, subwords_idx_len[m]): + * for d in range(subwords_idx_len[m]): */ /*else*/ { __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14fasttext_inner_ONEF); - /* "gensim/models/fasttext_inner.pyx":159 + /* "gensim/models/fasttext_inner.pyx":157 * else: * count += ONEF * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< - * for d in range(1, subwords_idx_len[m]): + * for d in range(subwords_idx_len[m]): * count += ONEF */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":160 + /* "gensim/models/fasttext_inner.pyx":158 * count += ONEF * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) - * for d in range(1, subwords_idx_len[m]): # <<<<<<<<<<<<<< + * for d in range(subwords_idx_len[m]): # <<<<<<<<<<<<<< * count += ONEF * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) */ __pyx_t_4 = (__pyx_v_subwords_idx_len[__pyx_v_m]); - for (__pyx_t_5 = 1; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { + for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { __pyx_v_d = __pyx_t_5; - /* "gensim/models/fasttext_inner.pyx":161 + /* "gensim/models/fasttext_inner.pyx":159 * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) - * for d in range(1, subwords_idx_len[m]): + * for d in range(subwords_idx_len[m]): * count += ONEF # <<<<<<<<<<<<<< * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) * */ __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14fasttext_inner_ONEF); - /* "gensim/models/fasttext_inner.pyx":162 - * for d in range(1, subwords_idx_len[m]): + /* "gensim/models/fasttext_inner.pyx":160 + * for d in range(subwords_idx_len[m]): * count += ONEF * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< * @@ -2693,7 +2673,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_L3_continue:; } - /* "gensim/models/fasttext_inner.pyx":164 + /* "gensim/models/fasttext_inner.pyx":162 * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) * * if count > (0.5): # <<<<<<<<<<<<<< @@ -2703,7 +2683,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_3 = ((__pyx_v_count > ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":165 + /* "gensim/models/fasttext_inner.pyx":163 * * if count > (0.5): * inv_count = ONEF/count # <<<<<<<<<<<<<< @@ -2712,7 +2692,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_inv_count = (__pyx_v_6gensim_6models_14fasttext_inner_ONEF / __pyx_v_count); - /* "gensim/models/fasttext_inner.pyx":164 + /* "gensim/models/fasttext_inner.pyx":162 * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) * * if count > (0.5): # <<<<<<<<<<<<<< @@ -2721,7 +2701,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":166 + /* "gensim/models/fasttext_inner.pyx":164 * if count > (0.5): * inv_count = ONEF/count * if cbow_mean: # <<<<<<<<<<<<<< @@ -2731,7 +2711,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_3 = (__pyx_v_cbow_mean != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":167 + /* "gensim/models/fasttext_inner.pyx":165 * inv_count = ONEF/count * if cbow_mean: * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< @@ -2740,7 +2720,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":166 + /* "gensim/models/fasttext_inner.pyx":164 * if count > (0.5): * inv_count = ONEF/count * if cbow_mean: # <<<<<<<<<<<<<< @@ -2749,7 +2729,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":169 + /* "gensim/models/fasttext_inner.pyx":167 * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2758,7 +2738,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":171 + /* "gensim/models/fasttext_inner.pyx":169 * memset(work, 0, size * cython.sizeof(REAL_t)) * * for d in range(negative+1): # <<<<<<<<<<<<<< @@ -2769,7 +2749,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_6; __pyx_t_1+=1) { __pyx_v_d = __pyx_t_1; - /* "gensim/models/fasttext_inner.pyx":172 + /* "gensim/models/fasttext_inner.pyx":170 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -2779,7 +2759,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_3 = ((__pyx_v_d == 0) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":173 + /* "gensim/models/fasttext_inner.pyx":171 * for d in range(negative+1): * if d == 0: * target_index = word_index # <<<<<<<<<<<<<< @@ -2788,7 +2768,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_target_index = __pyx_v_word_index; - /* "gensim/models/fasttext_inner.pyx":174 + /* "gensim/models/fasttext_inner.pyx":172 * if d == 0: * target_index = word_index * label = ONEF # <<<<<<<<<<<<<< @@ -2797,7 +2777,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_label = __pyx_v_6gensim_6models_14fasttext_inner_ONEF; - /* "gensim/models/fasttext_inner.pyx":172 + /* "gensim/models/fasttext_inner.pyx":170 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -2807,7 +2787,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente goto __pyx_L12; } - /* "gensim/models/fasttext_inner.pyx":176 + /* "gensim/models/fasttext_inner.pyx":174 * label = ONEF * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) # <<<<<<<<<<<<<< @@ -2817,7 +2797,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente /*else*/ { __pyx_v_target_index = __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_v_cum_table, ((__pyx_v_next_random >> 16) % (__pyx_v_cum_table[(__pyx_v_cum_table_len - 1)])), 0, __pyx_v_cum_table_len); - /* "gensim/models/fasttext_inner.pyx":177 + /* "gensim/models/fasttext_inner.pyx":175 * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< @@ -2826,7 +2806,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "gensim/models/fasttext_inner.pyx":178 + /* "gensim/models/fasttext_inner.pyx":176 * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -2836,7 +2816,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":179 + /* "gensim/models/fasttext_inner.pyx":177 * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: * continue # <<<<<<<<<<<<<< @@ -2845,7 +2825,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ goto __pyx_L10_continue; - /* "gensim/models/fasttext_inner.pyx":178 + /* "gensim/models/fasttext_inner.pyx":176 * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -2854,7 +2834,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":180 + /* "gensim/models/fasttext_inner.pyx":178 * if target_index == word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< @@ -2865,7 +2845,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente } __pyx_L12:; - /* "gensim/models/fasttext_inner.pyx":182 + /* "gensim/models/fasttext_inner.pyx":180 * label = 0.0 * * row2 = target_index * size # <<<<<<<<<<<<<< @@ -2874,7 +2854,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":183 + /* "gensim/models/fasttext_inner.pyx":181 * * row2 = target_index * size * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2883,7 +2863,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_f_dot = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":184 + /* "gensim/models/fasttext_inner.pyx":182 * row2 = target_index * size * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2901,7 +2881,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_L15_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":185 + /* "gensim/models/fasttext_inner.pyx":183 * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2910,7 +2890,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ goto __pyx_L10_continue; - /* "gensim/models/fasttext_inner.pyx":184 + /* "gensim/models/fasttext_inner.pyx":182 * row2 = target_index * size * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2919,7 +2899,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":186 + /* "gensim/models/fasttext_inner.pyx":184 * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2928,7 +2908,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f_dot + 6.0) * 83.0))]); - /* "gensim/models/fasttext_inner.pyx":187 + /* "gensim/models/fasttext_inner.pyx":185 * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< @@ -2937,7 +2917,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/fasttext_inner.pyx":189 + /* "gensim/models/fasttext_inner.pyx":187 * g = (label - f) * alpha * * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2946,7 +2926,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":190 + /* "gensim/models/fasttext_inner.pyx":188 * * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2957,7 +2937,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_L10_continue:; } - /* "gensim/models/fasttext_inner.pyx":192 + /* "gensim/models/fasttext_inner.pyx":190 * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) * * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< @@ -2967,7 +2947,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_3 = ((!(__pyx_v_cbow_mean != 0)) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":193 + /* "gensim/models/fasttext_inner.pyx":191 * * if not cbow_mean: # divide error over summed window vectors * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< @@ -2976,7 +2956,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":192 + /* "gensim/models/fasttext_inner.pyx":190 * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) * * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< @@ -2985,7 +2965,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":195 + /* "gensim/models/fasttext_inner.pyx":193 * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) * * for m in range(j,k): # <<<<<<<<<<<<<< @@ -2996,7 +2976,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":196 + /* "gensim/models/fasttext_inner.pyx":194 * * for m in range(j,k): * if m == i: # <<<<<<<<<<<<<< @@ -3006,7 +2986,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_3 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":197 + /* "gensim/models/fasttext_inner.pyx":195 * for m in range(j,k): * if m == i: * continue # <<<<<<<<<<<<<< @@ -3015,7 +2995,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ goto __pyx_L18_continue; - /* "gensim/models/fasttext_inner.pyx":196 + /* "gensim/models/fasttext_inner.pyx":194 * * for m in range(j,k): * if m == i: # <<<<<<<<<<<<<< @@ -3024,30 +3004,30 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":199 + /* "gensim/models/fasttext_inner.pyx":197 * continue * else: * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< - * for d in range(1, subwords_idx_len[m]): + * for d in range(subwords_idx_len[m]): * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) */ /*else*/ { __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":200 + /* "gensim/models/fasttext_inner.pyx":198 * else: * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) - * for d in range(1, subwords_idx_len[m]): # <<<<<<<<<<<<<< + * for d in range(subwords_idx_len[m]): # <<<<<<<<<<<<<< * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) * */ __pyx_t_4 = (__pyx_v_subwords_idx_len[__pyx_v_m]); - for (__pyx_t_5 = 1; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { + for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { __pyx_v_d = __pyx_t_5; - /* "gensim/models/fasttext_inner.pyx":201 + /* "gensim/models/fasttext_inner.pyx":199 * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) - * for d in range(1, subwords_idx_len[m]): + * for d in range(subwords_idx_len[m]): * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) # <<<<<<<<<<<<<< * * return next_random @@ -3058,7 +3038,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_L18_continue:; } - /* "gensim/models/fasttext_inner.pyx":203 + /* "gensim/models/fasttext_inner.pyx":201 * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) * * return next_random # <<<<<<<<<<<<<< @@ -3068,7 +3048,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_r = __pyx_v_next_random; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":136 + /* "gensim/models/fasttext_inner.pyx":134 * * * cdef unsigned long long fast_sentence_cbow_neg( # <<<<<<<<<<<<<< @@ -3081,7 +3061,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente return __pyx_r; } -/* "gensim/models/fasttext_inner.pyx":206 +/* "gensim/models/fasttext_inner.pyx":204 * * * cdef void fast_sentence_cbow_hs( # <<<<<<<<<<<<<< @@ -3098,16 +3078,16 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_inv_count; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f_dot; int __pyx_v_m; - long __pyx_v_d; + int __pyx_v_d; int __pyx_t_1; int __pyx_t_2; int __pyx_t_3; int __pyx_t_4; - long __pyx_t_5; + int __pyx_t_5; PY_LONG_LONG __pyx_t_6; int __pyx_t_7; - /* "gensim/models/fasttext_inner.pyx":215 + /* "gensim/models/fasttext_inner.pyx":213 * cdef long long a, b * cdef long long row2, sgn * cdef REAL_t f, g, count, inv_count = 1.0, f_dot, lprob # <<<<<<<<<<<<<< @@ -3116,7 +3096,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_inv_count = 1.0; - /* "gensim/models/fasttext_inner.pyx":218 + /* "gensim/models/fasttext_inner.pyx":216 * cdef int m * * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -3125,7 +3105,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":219 + /* "gensim/models/fasttext_inner.pyx":217 * * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< @@ -3134,7 +3114,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_count = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); - /* "gensim/models/fasttext_inner.pyx":220 + /* "gensim/models/fasttext_inner.pyx":218 * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 * for m in range(j, k): # <<<<<<<<<<<<<< @@ -3145,7 +3125,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":221 + /* "gensim/models/fasttext_inner.pyx":219 * count = 0.0 * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< @@ -3155,7 +3135,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_t_3 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":222 + /* "gensim/models/fasttext_inner.pyx":220 * for m in range(j, k): * if m == i: * continue # <<<<<<<<<<<<<< @@ -3164,7 +3144,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ goto __pyx_L3_continue; - /* "gensim/models/fasttext_inner.pyx":221 + /* "gensim/models/fasttext_inner.pyx":219 * count = 0.0 * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< @@ -3173,47 +3153,47 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ } - /* "gensim/models/fasttext_inner.pyx":224 + /* "gensim/models/fasttext_inner.pyx":222 * continue * else: * count += ONEF # <<<<<<<<<<<<<< * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) - * for d in range(1, subwords_idx_len[m]): + * for d in range(subwords_idx_len[m]): */ /*else*/ { __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14fasttext_inner_ONEF); - /* "gensim/models/fasttext_inner.pyx":225 + /* "gensim/models/fasttext_inner.pyx":223 * else: * count += ONEF * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< - * for d in range(1, subwords_idx_len[m]): + * for d in range(subwords_idx_len[m]): * count += ONEF */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":226 + /* "gensim/models/fasttext_inner.pyx":224 * count += ONEF * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) - * for d in range(1, subwords_idx_len[m]): # <<<<<<<<<<<<<< + * for d in range(subwords_idx_len[m]): # <<<<<<<<<<<<<< * count += ONEF * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) */ __pyx_t_4 = (__pyx_v_subwords_idx_len[__pyx_v_m]); - for (__pyx_t_5 = 1; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { + for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { __pyx_v_d = __pyx_t_5; - /* "gensim/models/fasttext_inner.pyx":227 + /* "gensim/models/fasttext_inner.pyx":225 * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) - * for d in range(1, subwords_idx_len[m]): + * for d in range(subwords_idx_len[m]): * count += ONEF # <<<<<<<<<<<<<< * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) * if count > (0.5): */ __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14fasttext_inner_ONEF); - /* "gensim/models/fasttext_inner.pyx":228 - * for d in range(1, subwords_idx_len[m]): + /* "gensim/models/fasttext_inner.pyx":226 + * for d in range(subwords_idx_len[m]): * count += ONEF * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< * if count > (0.5): @@ -3225,7 +3205,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_L3_continue:; } - /* "gensim/models/fasttext_inner.pyx":229 + /* "gensim/models/fasttext_inner.pyx":227 * count += ONEF * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) * if count > (0.5): # <<<<<<<<<<<<<< @@ -3235,7 +3215,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_t_3 = ((__pyx_v_count > ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":230 + /* "gensim/models/fasttext_inner.pyx":228 * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) * if count > (0.5): * inv_count = ONEF/count # <<<<<<<<<<<<<< @@ -3244,7 +3224,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_inv_count = (__pyx_v_6gensim_6models_14fasttext_inner_ONEF / __pyx_v_count); - /* "gensim/models/fasttext_inner.pyx":229 + /* "gensim/models/fasttext_inner.pyx":227 * count += ONEF * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) * if count > (0.5): # <<<<<<<<<<<<<< @@ -3253,7 +3233,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ } - /* "gensim/models/fasttext_inner.pyx":231 + /* "gensim/models/fasttext_inner.pyx":229 * if count > (0.5): * inv_count = ONEF/count * if cbow_mean: # <<<<<<<<<<<<<< @@ -3263,7 +3243,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_t_3 = (__pyx_v_cbow_mean != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":232 + /* "gensim/models/fasttext_inner.pyx":230 * inv_count = ONEF/count * if cbow_mean: * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< @@ -3272,7 +3252,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":231 + /* "gensim/models/fasttext_inner.pyx":229 * if count > (0.5): * inv_count = ONEF/count * if cbow_mean: # <<<<<<<<<<<<<< @@ -3281,7 +3261,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ } - /* "gensim/models/fasttext_inner.pyx":234 + /* "gensim/models/fasttext_inner.pyx":232 * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -3290,7 +3270,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":235 + /* "gensim/models/fasttext_inner.pyx":233 * * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelens[i]): # <<<<<<<<<<<<<< @@ -3301,7 +3281,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_1; __pyx_t_6+=1) { __pyx_v_b = __pyx_t_6; - /* "gensim/models/fasttext_inner.pyx":236 + /* "gensim/models/fasttext_inner.pyx":234 * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelens[i]): * row2 = word_point[b] * size # <<<<<<<<<<<<<< @@ -3310,7 +3290,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":237 + /* "gensim/models/fasttext_inner.pyx":235 * for b in range(codelens[i]): * row2 = word_point[b] * size * f_dot = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -3319,7 +3299,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_f_dot = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":238 + /* "gensim/models/fasttext_inner.pyx":236 * row2 = word_point[b] * size * f_dot = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -3337,7 +3317,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_L13_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":239 + /* "gensim/models/fasttext_inner.pyx":237 * f_dot = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -3346,7 +3326,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ goto __pyx_L10_continue; - /* "gensim/models/fasttext_inner.pyx":238 + /* "gensim/models/fasttext_inner.pyx":236 * row2 = word_point[b] * size * f_dot = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -3355,7 +3335,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ } - /* "gensim/models/fasttext_inner.pyx":240 + /* "gensim/models/fasttext_inner.pyx":238 * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -3364,7 +3344,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f_dot + 6.0) * 83.0))]); - /* "gensim/models/fasttext_inner.pyx":241 + /* "gensim/models/fasttext_inner.pyx":239 * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< @@ -3373,7 +3353,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/fasttext_inner.pyx":243 + /* "gensim/models/fasttext_inner.pyx":241 * g = (1 - word_code[b] - f) * alpha * * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -3382,7 +3362,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":244 + /* "gensim/models/fasttext_inner.pyx":242 * * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -3393,7 +3373,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_L10_continue:; } - /* "gensim/models/fasttext_inner.pyx":246 + /* "gensim/models/fasttext_inner.pyx":244 * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) * * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< @@ -3403,7 +3383,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_t_3 = ((!(__pyx_v_cbow_mean != 0)) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":247 + /* "gensim/models/fasttext_inner.pyx":245 * * if not cbow_mean: # divide error over summed window vectors * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< @@ -3412,7 +3392,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":246 + /* "gensim/models/fasttext_inner.pyx":244 * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) * * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< @@ -3421,7 +3401,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ } - /* "gensim/models/fasttext_inner.pyx":249 + /* "gensim/models/fasttext_inner.pyx":247 * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) * * for m in range(j,k): # <<<<<<<<<<<<<< @@ -3432,7 +3412,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":250 + /* "gensim/models/fasttext_inner.pyx":248 * * for m in range(j,k): * if m == i: # <<<<<<<<<<<<<< @@ -3442,7 +3422,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_t_3 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":251 + /* "gensim/models/fasttext_inner.pyx":249 * for m in range(j,k): * if m == i: * continue # <<<<<<<<<<<<<< @@ -3451,7 +3431,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ goto __pyx_L16_continue; - /* "gensim/models/fasttext_inner.pyx":250 + /* "gensim/models/fasttext_inner.pyx":248 * * for m in range(j,k): * if m == i: # <<<<<<<<<<<<<< @@ -3460,30 +3440,30 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ } - /* "gensim/models/fasttext_inner.pyx":253 + /* "gensim/models/fasttext_inner.pyx":251 * continue * else: * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< - * for d in range(1, subwords_idx_len[m]): + * for d in range(subwords_idx_len[m]): * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) */ /*else*/ { __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":254 + /* "gensim/models/fasttext_inner.pyx":252 * else: * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) - * for d in range(1, subwords_idx_len[m]): # <<<<<<<<<<<<<< + * for d in range(subwords_idx_len[m]): # <<<<<<<<<<<<<< * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) * */ __pyx_t_4 = (__pyx_v_subwords_idx_len[__pyx_v_m]); - for (__pyx_t_5 = 1; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { + for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { __pyx_v_d = __pyx_t_5; - /* "gensim/models/fasttext_inner.pyx":255 + /* "gensim/models/fasttext_inner.pyx":253 * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) - * for d in range(1, subwords_idx_len[m]): + * for d in range(subwords_idx_len[m]): * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) # <<<<<<<<<<<<<< * * @@ -3494,7 +3474,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_L16_continue:; } - /* "gensim/models/fasttext_inner.pyx":206 + /* "gensim/models/fasttext_inner.pyx":204 * * * cdef void fast_sentence_cbow_hs( # <<<<<<<<<<<<<< @@ -3505,7 +3485,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx /* function exit code */ } -/* "gensim/models/fasttext_inner.pyx":258 +/* "gensim/models/fasttext_inner.pyx":256 * * * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< @@ -3554,29 +3534,29 @@ static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_1train_batch_sg(PyObj case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentences)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 1); __PYX_ERR(0, 258, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 1); __PYX_ERR(0, 256, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 2); __PYX_ERR(0, 258, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 2); __PYX_ERR(0, 256, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 3); __PYX_ERR(0, 258, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 3); __PYX_ERR(0, 256, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 4: if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_l1)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 4); __PYX_ERR(0, 258, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 4); __PYX_ERR(0, 256, __pyx_L3_error) } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_batch_sg") < 0)) __PYX_ERR(0, 258, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_batch_sg") < 0)) __PYX_ERR(0, 256, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 5) { goto __pyx_L5_argtuple_error; @@ -3595,7 +3575,7 @@ static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_1train_batch_sg(PyObj } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 258, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 256, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_sg", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -3679,142 +3659,142 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON int __pyx_t_24; __Pyx_RefNannySetupContext("train_batch_sg", 0); - /* "gensim/models/fasttext_inner.pyx":259 + /* "gensim/models/fasttext_inner.pyx":257 * * def train_batch_sg(model, sentences, alpha, _work, _l1): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int sample = (model.sample != 0) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 259, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 257, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 259, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 257, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":260 + /* "gensim/models/fasttext_inner.pyx":258 * def train_batch_sg(model, sentences, alpha, _work, _l1): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int sample = (model.sample != 0) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 260, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 258, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 260, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 258, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":261 + /* "gensim/models/fasttext_inner.pyx":259 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int sample = (model.sample != 0) # <<<<<<<<<<<<<< * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_sample); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 261, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_sample); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 259, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = PyObject_RichCompare(__pyx_t_1, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 261, __pyx_L1_error) + __pyx_t_3 = PyObject_RichCompare(__pyx_t_1, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 259, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 261, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 259, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_sample = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":263 + /* "gensim/models/fasttext_inner.pyx":261 * cdef int sample = (model.sample != 0) * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 263, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 261, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn0_vocab); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 263, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn0_vocab); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 261, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 263, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 261, __pyx_L1_error) __pyx_v_syn0_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":264 + /* "gensim/models/fasttext_inner.pyx":262 * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) # <<<<<<<<<<<<<< * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_vocab_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 264, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_vocab_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 262, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 264, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 262, __pyx_L1_error) __pyx_v_word_locks_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":265 + /* "gensim/models/fasttext_inner.pyx":263 * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 265, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 263, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn0_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 265, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn0_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 263, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 265, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 263, __pyx_L1_error) __pyx_v_syn0_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":266 + /* "gensim/models/fasttext_inner.pyx":264 * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) # <<<<<<<<<<<<<< * * cdef REAL_t *work */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_ngrams_lockf); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 266, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_ngrams_lockf); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 264, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 266, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 264, __pyx_L1_error) __pyx_v_word_locks_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":271 + /* "gensim/models/fasttext_inner.pyx":269 * cdef REAL_t *l1 * * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 271, __pyx_L1_error) + __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 269, __pyx_L1_error) __pyx_v__alpha = __pyx_t_4; - /* "gensim/models/fasttext_inner.pyx":272 + /* "gensim/models/fasttext_inner.pyx":270 * * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 272, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 270, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 272, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 270, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_size = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":278 + /* "gensim/models/fasttext_inner.pyx":276 * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] * cdef int sentence_idx[MAX_SENTENCE_LEN + 1] * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 278, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 276, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 278, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 276, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_window = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":281 + /* "gensim/models/fasttext_inner.pyx":279 * * cdef int i, j, k * cdef int effective_words = 0, effective_sentences = 0 # <<<<<<<<<<<<<< @@ -3824,19 +3804,19 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_v_effective_words = 0; __pyx_v_effective_sentences = 0; - /* "gensim/models/fasttext_inner.pyx":301 + /* "gensim/models/fasttext_inner.pyx":299 * # dummy dictionary to ensure that the memory locations that subwords_idx point to * # are referenced throughout so that it isn't put back to free memory pool by Python's memory manager * subword_arrays = {} # <<<<<<<<<<<<<< * * if hs: */ - __pyx_t_3 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 301, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 299, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_v_subword_arrays = ((PyObject*)__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":303 + /* "gensim/models/fasttext_inner.pyx":301 * subword_arrays = {} * * if hs: # <<<<<<<<<<<<<< @@ -3846,20 +3826,20 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":304 + /* "gensim/models/fasttext_inner.pyx":302 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 304, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 302, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 304, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 302, __pyx_L1_error) __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":303 + /* "gensim/models/fasttext_inner.pyx":301 * subword_arrays = {} * * if hs: # <<<<<<<<<<<<<< @@ -3868,7 +3848,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":306 + /* "gensim/models/fasttext_inner.pyx":304 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -3878,46 +3858,46 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":307 + /* "gensim/models/fasttext_inner.pyx":305 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 307, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 305, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 307, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 305, __pyx_L1_error) __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":308 + /* "gensim/models/fasttext_inner.pyx":306 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) # <<<<<<<<<<<<<< * cum_table_len = len(model.cum_table) * if negative or sample: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 308, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 306, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 308, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 306, __pyx_L1_error) __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":309 + /* "gensim/models/fasttext_inner.pyx":307 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) # <<<<<<<<<<<<<< * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 307, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == ((Py_ssize_t)-1))) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == ((Py_ssize_t)-1))) __PYX_ERR(0, 307, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cum_table_len = __pyx_t_6; - /* "gensim/models/fasttext_inner.pyx":306 + /* "gensim/models/fasttext_inner.pyx":304 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -3926,7 +3906,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":310 + /* "gensim/models/fasttext_inner.pyx":308 * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -3944,41 +3924,41 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_L6_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":311 + /* "gensim/models/fasttext_inner.pyx":309 * cum_table_len = len(model.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 311, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 309, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 311, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 309, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 311, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 309, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 311, __pyx_L1_error) + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 309, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 311, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 309, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 311, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 309, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 311, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 309, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 311, __pyx_L1_error) + __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 309, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 311, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 309, __pyx_L1_error) __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_next_random = __pyx_t_9; - /* "gensim/models/fasttext_inner.pyx":310 + /* "gensim/models/fasttext_inner.pyx":308 * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -3987,42 +3967,42 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":314 + /* "gensim/models/fasttext_inner.pyx":312 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * l1 = np.PyArray_DATA(_l1) * */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 314, __pyx_L1_error) + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 312, __pyx_L1_error) __pyx_v_work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "gensim/models/fasttext_inner.pyx":315 + /* "gensim/models/fasttext_inner.pyx":313 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * l1 = np.PyArray_DATA(_l1) # <<<<<<<<<<<<<< * * # prepare C structures so we can go "full C" and release the Python GIL */ - if (!(likely(((__pyx_v__l1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__l1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 315, __pyx_L1_error) + if (!(likely(((__pyx_v__l1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__l1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 313, __pyx_L1_error) __pyx_v_l1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__l1))); - /* "gensim/models/fasttext_inner.pyx":318 + /* "gensim/models/fasttext_inner.pyx":316 * * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab # <<<<<<<<<<<<<< * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 318, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 316, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 318, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 316, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_vlookup = __pyx_t_3; __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":319 + /* "gensim/models/fasttext_inner.pyx":317 * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 # <<<<<<<<<<<<<< @@ -4031,7 +4011,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ (__pyx_v_sentence_idx[0]) = 0; - /* "gensim/models/fasttext_inner.pyx":320 + /* "gensim/models/fasttext_inner.pyx":318 * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: # <<<<<<<<<<<<<< @@ -4042,26 +4022,26 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_3 = __pyx_v_sentences; __Pyx_INCREF(__pyx_t_3); __pyx_t_6 = 0; __pyx_t_10 = NULL; } else { - __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 318, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 318, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_10)) { if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 318, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 318, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 318, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 320, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 318, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } @@ -4071,7 +4051,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 320, __pyx_L1_error) + else __PYX_ERR(0, 318, __pyx_L1_error) } break; } @@ -4080,18 +4060,18 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF_SET(__pyx_v_sent, __pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":321 + /* "gensim/models/fasttext_inner.pyx":319 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: */ - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 319, __pyx_L1_error) __pyx_t_7 = ((!__pyx_t_5) != 0); if (__pyx_t_7) { - /* "gensim/models/fasttext_inner.pyx":322 + /* "gensim/models/fasttext_inner.pyx":320 * for sent in sentences: * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged # <<<<<<<<<<<<<< @@ -4100,7 +4080,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L8_continue; - /* "gensim/models/fasttext_inner.pyx":321 + /* "gensim/models/fasttext_inner.pyx":319 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< @@ -4109,7 +4089,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":323 + /* "gensim/models/fasttext_inner.pyx":321 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< @@ -4120,26 +4100,26 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_8 = __pyx_v_sent; __Pyx_INCREF(__pyx_t_8); __pyx_t_11 = 0; __pyx_t_12 = NULL; } else { - __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 323, __pyx_L1_error) + __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 323, __pyx_L1_error) + __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 321, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_12)) { if (likely(PyList_CheckExact(__pyx_t_8))) { if (__pyx_t_11 >= PyList_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 323, __pyx_L1_error) + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 321, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 323, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_11 >= PyTuple_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 323, __pyx_L1_error) + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 321, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 323, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } @@ -4149,7 +4129,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 323, __pyx_L1_error) + else __PYX_ERR(0, 321, __pyx_L1_error) } break; } @@ -4158,16 +4138,16 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":324 + /* "gensim/models/fasttext_inner.pyx":322 * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: * word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window */ - __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 324, __pyx_L1_error) + __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 322, __pyx_L1_error) if ((__pyx_t_7 != 0)) { - __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 324, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __pyx_t_1 = __pyx_t_13; __pyx_t_13 = 0; @@ -4178,7 +4158,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":325 + /* "gensim/models/fasttext_inner.pyx":323 * for token in sent: * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< @@ -4189,7 +4169,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_t_7 != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":326 + /* "gensim/models/fasttext_inner.pyx":324 * word = vlookup[token] if token in vlookup else None * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window # <<<<<<<<<<<<<< @@ -4198,7 +4178,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":325 + /* "gensim/models/fasttext_inner.pyx":323 * for token in sent: * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< @@ -4207,7 +4187,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":327 + /* "gensim/models/fasttext_inner.pyx":325 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -4220,20 +4200,20 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = __pyx_t_7; goto __pyx_L15_bool_binop_done; } - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 327, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 325, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 327, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 325, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 327, __pyx_L1_error) + __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 325, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 327, __pyx_L1_error) + __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 325, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __pyx_t_5 = __pyx_t_7; __pyx_L15_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":328 + /* "gensim/models/fasttext_inner.pyx":326 * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): * continue # <<<<<<<<<<<<<< @@ -4242,7 +4222,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":327 + /* "gensim/models/fasttext_inner.pyx":325 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -4251,45 +4231,45 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":329 + /* "gensim/models/fasttext_inner.pyx":327 * if sample and word.sample_int < random_int32(&next_random): * continue * indexes[effective_words] = word.index # <<<<<<<<<<<<<< * * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] */ - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 327, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 327, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; (__pyx_v_indexes[__pyx_v_effective_words]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":331 + /* "gensim/models/fasttext_inner.pyx":329 * indexes[effective_words] = word.index * * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] # <<<<<<<<<<<<<< * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = (len(subwords) + 1) */ - __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams_word); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams_word); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_index2word); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_index2word); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_17 = PyObject_GetItem(__pyx_t_16, __pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_17 = PyObject_GetItem(__pyx_t_16, __pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_t_17); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_t_17); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; @@ -4297,9 +4277,9 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_17 = __pyx_t_13; __Pyx_INCREF(__pyx_t_17); __pyx_t_18 = 0; __pyx_t_19 = NULL; } else { - __pyx_t_18 = -1; __pyx_t_17 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_18 = -1; __pyx_t_17 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); - __pyx_t_19 = Py_TYPE(__pyx_t_17)->tp_iternext; if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_19 = Py_TYPE(__pyx_t_17)->tp_iternext; if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 329, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; for (;;) { @@ -4307,17 +4287,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON if (likely(PyList_CheckExact(__pyx_t_17))) { if (__pyx_t_18 >= PyList_GET_SIZE(__pyx_t_17)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyList_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_13 = PyList_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 329, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); #endif } else { if (__pyx_t_18 >= PyTuple_GET_SIZE(__pyx_t_17)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 329, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); #endif } @@ -4327,7 +4307,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 331, __pyx_L1_error) + else __PYX_ERR(0, 329, __pyx_L1_error) } break; } @@ -4335,58 +4315,58 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON } __Pyx_XDECREF_SET(__pyx_v_subword_i, __pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_v_subword_i); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_v_subword_i); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_13))) __PYX_ERR(0, 331, __pyx_L1_error) + if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_13))) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; } __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_XDECREF_SET(__pyx_v_subwords, ((PyObject*)__pyx_t_14)); __pyx_t_14 = 0; - /* "gensim/models/fasttext_inner.pyx":332 + /* "gensim/models/fasttext_inner.pyx":330 * * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) # <<<<<<<<<<<<<< * subwords_idx_len[effective_words] = (len(subwords) + 1) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) */ - __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 332, __pyx_L1_error) + __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 330, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 332, __pyx_L1_error) + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 330, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 332, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 330, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_13 = PyList_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 332, __pyx_L1_error) + __pyx_t_13 = PyList_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 330, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_GIVEREF(__pyx_t_14); PyList_SET_ITEM(__pyx_t_13, 0, __pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = PyNumber_Add(__pyx_t_13, __pyx_v_subwords); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 332, __pyx_L1_error) + __pyx_t_14 = PyNumber_Add(__pyx_t_13, __pyx_v_subwords); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 330, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyTuple_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 332, __pyx_L1_error) + __pyx_t_13 = PyTuple_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 330, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_GIVEREF(__pyx_t_14); PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 332, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 330, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 332, __pyx_L1_error) + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 330, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_uint32); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 332, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_uint32); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 330, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyDict_SetItem(__pyx_t_14, __pyx_n_s_dtype, __pyx_t_16) < 0) __PYX_ERR(0, 332, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_14, __pyx_n_s_dtype, __pyx_t_16) < 0) __PYX_ERR(0, 330, __pyx_L1_error) __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __pyx_t_16 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_t_13, __pyx_t_14); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 332, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_t_13, __pyx_t_14); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 330, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; @@ -4394,39 +4374,39 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF_SET(__pyx_v_word_subwords, __pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":333 + /* "gensim/models/fasttext_inner.pyx":331 * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = (len(subwords) + 1) # <<<<<<<<<<<<<< * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 */ - __pyx_t_18 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 333, __pyx_L1_error) + __pyx_t_18 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 331, __pyx_L1_error) (__pyx_v_subwords_idx_len[__pyx_v_effective_words]) = ((int)(__pyx_t_18 + 1)); - /* "gensim/models/fasttext_inner.pyx":334 + /* "gensim/models/fasttext_inner.pyx":332 * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = (len(subwords) + 1) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) # <<<<<<<<<<<<<< * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords */ - if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 334, __pyx_L1_error) + if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 332, __pyx_L1_error) (__pyx_v_subwords_idx[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_subwords))); - /* "gensim/models/fasttext_inner.pyx":336 + /* "gensim/models/fasttext_inner.pyx":334 * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords # <<<<<<<<<<<<<< * * if hs: */ - __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 336, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 334, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_16, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 336, __pyx_L1_error) + if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_16, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 334, __pyx_L1_error) __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":338 + /* "gensim/models/fasttext_inner.pyx":336 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -4436,46 +4416,46 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":339 + /* "gensim/models/fasttext_inner.pyx":337 * * if hs: * codelens[effective_words] = len(word.code) # <<<<<<<<<<<<<< * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 339, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 337, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - __pyx_t_18 = PyObject_Length(__pyx_t_16); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 339, __pyx_L1_error) + __pyx_t_18 = PyObject_Length(__pyx_t_16); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 337, __pyx_L1_error) __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; (__pyx_v_codelens[__pyx_v_effective_words]) = ((int)__pyx_t_18); - /* "gensim/models/fasttext_inner.pyx":340 + /* "gensim/models/fasttext_inner.pyx":338 * if hs: * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[effective_words] = np.PyArray_DATA(word.point) * */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 340, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 338, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 340, __pyx_L1_error) + if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 338, __pyx_L1_error) (__pyx_v_codes[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_16))); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":341 + /* "gensim/models/fasttext_inner.pyx":339 * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * * effective_words += 1 */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 341, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 339, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 341, __pyx_L1_error) + if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 339, __pyx_L1_error) (__pyx_v_points[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_16))); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":338 + /* "gensim/models/fasttext_inner.pyx":336 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -4484,7 +4464,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":343 + /* "gensim/models/fasttext_inner.pyx":341 * points[effective_words] = np.PyArray_DATA(word.point) * * effective_words += 1 # <<<<<<<<<<<<<< @@ -4493,7 +4473,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_effective_words = (__pyx_v_effective_words + 1); - /* "gensim/models/fasttext_inner.pyx":344 + /* "gensim/models/fasttext_inner.pyx":342 * * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< @@ -4503,7 +4483,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_effective_words == 0x2710) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":345 + /* "gensim/models/fasttext_inner.pyx":343 * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< @@ -4512,7 +4492,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L12_break; - /* "gensim/models/fasttext_inner.pyx":344 + /* "gensim/models/fasttext_inner.pyx":342 * * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< @@ -4521,7 +4501,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":323 + /* "gensim/models/fasttext_inner.pyx":321 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< @@ -4533,7 +4513,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_L12_break:; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":350 + /* "gensim/models/fasttext_inner.pyx":348 * # across sentence boundaries. * # indices of sentence number X are between tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 357, __pyx_L1_error) + __pyx_t_10 = Py_TYPE(__pyx_t_16)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 355, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; for (;;) { @@ -4674,17 +4654,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON if (likely(PyList_CheckExact(__pyx_t_16))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_16)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 357, __pyx_L1_error) + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 355, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 357, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 355, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_16)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 357, __pyx_L1_error) + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 355, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 357, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 355, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } @@ -4694,7 +4674,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 357, __pyx_L1_error) + else __PYX_ERR(0, 355, __pyx_L1_error) } break; } @@ -4705,17 +4685,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "gensim/models/fasttext_inner.pyx":358 + /* "gensim/models/fasttext_inner.pyx":356 * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * with nogil: */ - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 358, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 356, __pyx_L1_error) (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":357 + /* "gensim/models/fasttext_inner.pyx":355 * * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): # <<<<<<<<<<<<<< @@ -4725,7 +4705,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON } __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":360 + /* "gensim/models/fasttext_inner.pyx":358 * reduced_windows[i] = item * * with nogil: # <<<<<<<<<<<<<< @@ -4740,7 +4720,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON #endif /*try:*/ { - /* "gensim/models/fasttext_inner.pyx":361 + /* "gensim/models/fasttext_inner.pyx":359 * * with nogil: * for sent_idx in range(effective_sentences): # <<<<<<<<<<<<<< @@ -4751,7 +4731,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON for (__pyx_t_20 = 0; __pyx_t_20 < __pyx_t_2; __pyx_t_20+=1) { __pyx_v_sent_idx = __pyx_t_20; - /* "gensim/models/fasttext_inner.pyx":362 + /* "gensim/models/fasttext_inner.pyx":360 * with nogil: * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] # <<<<<<<<<<<<<< @@ -4760,7 +4740,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_idx_start = (__pyx_v_sentence_idx[__pyx_v_sent_idx]); - /* "gensim/models/fasttext_inner.pyx":363 + /* "gensim/models/fasttext_inner.pyx":361 * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] # <<<<<<<<<<<<<< @@ -4769,7 +4749,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_idx_end = (__pyx_v_sentence_idx[(__pyx_v_sent_idx + 1)]); - /* "gensim/models/fasttext_inner.pyx":364 + /* "gensim/models/fasttext_inner.pyx":362 * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): # <<<<<<<<<<<<<< @@ -4780,7 +4760,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON for (__pyx_t_22 = __pyx_v_idx_start; __pyx_t_22 < __pyx_t_21; __pyx_t_22+=1) { __pyx_v_i = __pyx_t_22; - /* "gensim/models/fasttext_inner.pyx":365 + /* "gensim/models/fasttext_inner.pyx":363 * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -4789,7 +4769,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":366 + /* "gensim/models/fasttext_inner.pyx":364 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -4799,7 +4779,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_j < __pyx_v_idx_start) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":367 + /* "gensim/models/fasttext_inner.pyx":365 * j = i - window + reduced_windows[i] * if j < idx_start: * j = idx_start # <<<<<<<<<<<<<< @@ -4808,7 +4788,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_j = __pyx_v_idx_start; - /* "gensim/models/fasttext_inner.pyx":366 + /* "gensim/models/fasttext_inner.pyx":364 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -4817,7 +4797,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":368 + /* "gensim/models/fasttext_inner.pyx":366 * if j < idx_start: * j = idx_start * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -4826,7 +4806,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":369 + /* "gensim/models/fasttext_inner.pyx":367 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -4836,7 +4816,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_k > __pyx_v_idx_end) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":370 + /* "gensim/models/fasttext_inner.pyx":368 * k = i + window + 1 - reduced_windows[i] * if k > idx_end: * k = idx_end # <<<<<<<<<<<<<< @@ -4845,7 +4825,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_k = __pyx_v_idx_end; - /* "gensim/models/fasttext_inner.pyx":369 + /* "gensim/models/fasttext_inner.pyx":367 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -4854,7 +4834,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":371 + /* "gensim/models/fasttext_inner.pyx":369 * if k > idx_end: * k = idx_end * for j in range(j, k): # <<<<<<<<<<<<<< @@ -4865,7 +4845,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON for (__pyx_t_24 = __pyx_v_j; __pyx_t_24 < __pyx_t_23; __pyx_t_24+=1) { __pyx_v_j = __pyx_t_24; - /* "gensim/models/fasttext_inner.pyx":372 + /* "gensim/models/fasttext_inner.pyx":370 * k = idx_end * for j in range(j, k): * if j == i: # <<<<<<<<<<<<<< @@ -4875,7 +4855,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_j == __pyx_v_i) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":373 + /* "gensim/models/fasttext_inner.pyx":371 * for j in range(j, k): * if j == i: * continue # <<<<<<<<<<<<<< @@ -4884,7 +4864,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L33_continue; - /* "gensim/models/fasttext_inner.pyx":372 + /* "gensim/models/fasttext_inner.pyx":370 * k = idx_end * for j in range(j, k): * if j == i: # <<<<<<<<<<<<<< @@ -4893,7 +4873,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":374 + /* "gensim/models/fasttext_inner.pyx":372 * if j == i: * continue * if hs: # <<<<<<<<<<<<<< @@ -4903,7 +4883,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":375 + /* "gensim/models/fasttext_inner.pyx":373 * continue * if hs: * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) # <<<<<<<<<<<<<< @@ -4912,7 +4892,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs((__pyx_v_points[__pyx_v_j]), (__pyx_v_codes[__pyx_v_j]), (__pyx_v_codelens[__pyx_v_j]), __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1, __pyx_v_size, (__pyx_v_subwords_idx[__pyx_v_i]), (__pyx_v_subwords_idx_len[__pyx_v_i]), __pyx_v__alpha, __pyx_v_work, __pyx_v_l1, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":374 + /* "gensim/models/fasttext_inner.pyx":372 * if j == i: * continue * if hs: # <<<<<<<<<<<<<< @@ -4921,7 +4901,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":376 + /* "gensim/models/fasttext_inner.pyx":374 * if hs: * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< @@ -4931,7 +4911,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":377 + /* "gensim/models/fasttext_inner.pyx":375 * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) * if negative: * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) # <<<<<<<<<<<<<< @@ -4940,7 +4920,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_next_random = __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), (__pyx_v_subwords_idx[__pyx_v_i]), (__pyx_v_subwords_idx_len[__pyx_v_i]), __pyx_v__alpha, __pyx_v_work, __pyx_v_l1, __pyx_v_next_random, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":376 + /* "gensim/models/fasttext_inner.pyx":374 * if hs: * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< @@ -4954,7 +4934,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON } } - /* "gensim/models/fasttext_inner.pyx":360 + /* "gensim/models/fasttext_inner.pyx":358 * reduced_windows[i] = item * * with nogil: # <<<<<<<<<<<<<< @@ -4973,7 +4953,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON } } - /* "gensim/models/fasttext_inner.pyx":379 + /* "gensim/models/fasttext_inner.pyx":377 * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) * * return effective_words # <<<<<<<<<<<<<< @@ -4981,13 +4961,13 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 379, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 377, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __pyx_r = __pyx_t_16; __pyx_t_16 = 0; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":258 + /* "gensim/models/fasttext_inner.pyx":256 * * * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< @@ -5021,7 +5001,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON return __pyx_r; } -/* "gensim/models/fasttext_inner.pyx":382 +/* "gensim/models/fasttext_inner.pyx":380 * * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< @@ -5070,29 +5050,29 @@ static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_3train_batch_cbow(PyO case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentences)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 1); __PYX_ERR(0, 382, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 1); __PYX_ERR(0, 380, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 2); __PYX_ERR(0, 382, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 2); __PYX_ERR(0, 380, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 3); __PYX_ERR(0, 382, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 3); __PYX_ERR(0, 380, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 4: if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_neu1)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 4); __PYX_ERR(0, 382, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 4); __PYX_ERR(0, 380, __pyx_L3_error) } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_batch_cbow") < 0)) __PYX_ERR(0, 382, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_batch_cbow") < 0)) __PYX_ERR(0, 380, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 5) { goto __pyx_L5_argtuple_error; @@ -5111,7 +5091,7 @@ static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_3train_batch_cbow(PyO } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 382, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 380, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_cbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -5194,155 +5174,155 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT int __pyx_t_22; __Pyx_RefNannySetupContext("train_batch_cbow", 0); - /* "gensim/models/fasttext_inner.pyx":383 + /* "gensim/models/fasttext_inner.pyx":381 * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int sample = (model.sample != 0) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 383, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 381, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 383, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 381, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":384 + /* "gensim/models/fasttext_inner.pyx":382 * def train_batch_cbow(model, sentences, alpha, _work, _neu1): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int sample = (model.sample != 0) * cdef int cbow_mean = model.cbow_mean */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 384, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 382, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 384, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 382, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":385 + /* "gensim/models/fasttext_inner.pyx":383 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int sample = (model.sample != 0) # <<<<<<<<<<<<<< * cdef int cbow_mean = model.cbow_mean * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_sample); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 385, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_sample); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 383, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = PyObject_RichCompare(__pyx_t_1, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 385, __pyx_L1_error) + __pyx_t_3 = PyObject_RichCompare(__pyx_t_1, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 383, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 385, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 383, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_sample = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":386 + /* "gensim/models/fasttext_inner.pyx":384 * cdef int negative = model.negative * cdef int sample = (model.sample != 0) * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 386, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 384, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 386, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 384, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cbow_mean = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":388 + /* "gensim/models/fasttext_inner.pyx":386 * cdef int cbow_mean = model.cbow_mean * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 388, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 386, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn0_vocab); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 388, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn0_vocab); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 386, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 388, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 386, __pyx_L1_error) __pyx_v_syn0_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":389 + /* "gensim/models/fasttext_inner.pyx":387 * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) # <<<<<<<<<<<<<< * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_vocab_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 389, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_vocab_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 387, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 389, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 387, __pyx_L1_error) __pyx_v_word_locks_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":390 + /* "gensim/models/fasttext_inner.pyx":388 * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 390, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 388, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn0_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 390, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn0_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 388, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 390, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 388, __pyx_L1_error) __pyx_v_syn0_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":391 + /* "gensim/models/fasttext_inner.pyx":389 * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) # <<<<<<<<<<<<<< * * cdef REAL_t *work */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_ngrams_lockf); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 391, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_ngrams_lockf); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 389, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 391, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 389, __pyx_L1_error) __pyx_v_word_locks_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":394 + /* "gensim/models/fasttext_inner.pyx":392 * * cdef REAL_t *work * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 394, __pyx_L1_error) + __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 392, __pyx_L1_error) __pyx_v__alpha = __pyx_t_4; - /* "gensim/models/fasttext_inner.pyx":395 + /* "gensim/models/fasttext_inner.pyx":393 * cdef REAL_t *work * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 395, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 393, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 395, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 393, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_size = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":401 + /* "gensim/models/fasttext_inner.pyx":399 * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] * cdef int sentence_idx[MAX_SENTENCE_LEN + 1] * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 401, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 399, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 401, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 399, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_window = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":404 + /* "gensim/models/fasttext_inner.pyx":402 * * cdef int i, j, k * cdef int effective_words = 0, effective_sentences = 0 # <<<<<<<<<<<<<< @@ -5352,19 +5332,19 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_v_effective_words = 0; __pyx_v_effective_sentences = 0; - /* "gensim/models/fasttext_inner.pyx":424 + /* "gensim/models/fasttext_inner.pyx":422 * # dummy dictionary to ensure that the memory locations that subwords_idx point to * # are referenced throughout so that it isn't put back to free memory pool by Python's memory manager * subword_arrays = {} # <<<<<<<<<<<<<< * * if hs: */ - __pyx_t_3 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 424, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 422, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_v_subword_arrays = ((PyObject*)__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":426 + /* "gensim/models/fasttext_inner.pyx":424 * subword_arrays = {} * * if hs: # <<<<<<<<<<<<<< @@ -5374,20 +5354,20 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":427 + /* "gensim/models/fasttext_inner.pyx":425 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 427, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 425, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 427, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 425, __pyx_L1_error) __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":426 + /* "gensim/models/fasttext_inner.pyx":424 * subword_arrays = {} * * if hs: # <<<<<<<<<<<<<< @@ -5396,7 +5376,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":429 + /* "gensim/models/fasttext_inner.pyx":427 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -5406,46 +5386,46 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":430 + /* "gensim/models/fasttext_inner.pyx":428 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 430, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 428, __pyx_L1_error) __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":431 + /* "gensim/models/fasttext_inner.pyx":429 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) # <<<<<<<<<<<<<< * cum_table_len = len(model.cum_table) * if negative or sample: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 431, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 429, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 431, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 429, __pyx_L1_error) __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":432 + /* "gensim/models/fasttext_inner.pyx":430 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) # <<<<<<<<<<<<<< * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 430, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == ((Py_ssize_t)-1))) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == ((Py_ssize_t)-1))) __PYX_ERR(0, 430, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cum_table_len = __pyx_t_6; - /* "gensim/models/fasttext_inner.pyx":429 + /* "gensim/models/fasttext_inner.pyx":427 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -5454,7 +5434,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":433 + /* "gensim/models/fasttext_inner.pyx":431 * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -5472,41 +5452,41 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_L6_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":434 + /* "gensim/models/fasttext_inner.pyx":432 * cum_table_len = len(model.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 434, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 434, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 434, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 434, __pyx_L1_error) + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 434, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 434, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 434, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 434, __pyx_L1_error) + __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 434, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_next_random = __pyx_t_9; - /* "gensim/models/fasttext_inner.pyx":433 + /* "gensim/models/fasttext_inner.pyx":431 * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -5515,42 +5495,42 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":437 + /* "gensim/models/fasttext_inner.pyx":435 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * neu1 = np.PyArray_DATA(_neu1) * */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 437, __pyx_L1_error) + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 435, __pyx_L1_error) __pyx_v_work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "gensim/models/fasttext_inner.pyx":438 + /* "gensim/models/fasttext_inner.pyx":436 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * neu1 = np.PyArray_DATA(_neu1) # <<<<<<<<<<<<<< * * # prepare C structures so we can go "full C" and release the Python GIL */ - if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 438, __pyx_L1_error) + if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 436, __pyx_L1_error) __pyx_v_neu1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__neu1))); - /* "gensim/models/fasttext_inner.pyx":441 + /* "gensim/models/fasttext_inner.pyx":439 * * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab # <<<<<<<<<<<<<< * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 439, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 439, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_vlookup = __pyx_t_3; __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":442 + /* "gensim/models/fasttext_inner.pyx":440 * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 # <<<<<<<<<<<<<< @@ -5559,7 +5539,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ (__pyx_v_sentence_idx[0]) = 0; - /* "gensim/models/fasttext_inner.pyx":443 + /* "gensim/models/fasttext_inner.pyx":441 * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: # <<<<<<<<<<<<<< @@ -5570,26 +5550,26 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_3 = __pyx_v_sentences; __Pyx_INCREF(__pyx_t_3); __pyx_t_6 = 0; __pyx_t_10 = NULL; } else { - __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 443, __pyx_L1_error) + __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 441, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 443, __pyx_L1_error) + __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 441, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_10)) { if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 443, __pyx_L1_error) + __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 441, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 443, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 441, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 443, __pyx_L1_error) + __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 441, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 443, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 441, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } @@ -5599,7 +5579,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 443, __pyx_L1_error) + else __PYX_ERR(0, 441, __pyx_L1_error) } break; } @@ -5608,18 +5588,18 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_sent, __pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":444 + /* "gensim/models/fasttext_inner.pyx":442 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: */ - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 444, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 442, __pyx_L1_error) __pyx_t_7 = ((!__pyx_t_5) != 0); if (__pyx_t_7) { - /* "gensim/models/fasttext_inner.pyx":445 + /* "gensim/models/fasttext_inner.pyx":443 * for sent in sentences: * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged # <<<<<<<<<<<<<< @@ -5628,7 +5608,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L8_continue; - /* "gensim/models/fasttext_inner.pyx":444 + /* "gensim/models/fasttext_inner.pyx":442 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< @@ -5637,7 +5617,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":446 + /* "gensim/models/fasttext_inner.pyx":444 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< @@ -5648,26 +5628,26 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_8 = __pyx_v_sent; __Pyx_INCREF(__pyx_t_8); __pyx_t_11 = 0; __pyx_t_12 = NULL; } else { - __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 446, __pyx_L1_error) + __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 444, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 446, __pyx_L1_error) + __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 444, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_12)) { if (likely(PyList_CheckExact(__pyx_t_8))) { if (__pyx_t_11 >= PyList_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 446, __pyx_L1_error) + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 444, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 446, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 444, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_11 >= PyTuple_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 446, __pyx_L1_error) + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 444, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 446, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 444, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } @@ -5677,7 +5657,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 446, __pyx_L1_error) + else __PYX_ERR(0, 444, __pyx_L1_error) } break; } @@ -5686,16 +5666,16 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":447 + /* "gensim/models/fasttext_inner.pyx":445 * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: * word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window */ - __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 447, __pyx_L1_error) + __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 445, __pyx_L1_error) if ((__pyx_t_7 != 0)) { - __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 447, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 445, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __pyx_t_1 = __pyx_t_13; __pyx_t_13 = 0; @@ -5706,7 +5686,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":448 + /* "gensim/models/fasttext_inner.pyx":446 * for token in sent: * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< @@ -5717,7 +5697,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_t_7 != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":449 + /* "gensim/models/fasttext_inner.pyx":447 * word = vlookup[token] if token in vlookup else None * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window # <<<<<<<<<<<<<< @@ -5726,7 +5706,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":448 + /* "gensim/models/fasttext_inner.pyx":446 * for token in sent: * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< @@ -5735,7 +5715,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":450 + /* "gensim/models/fasttext_inner.pyx":448 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -5748,20 +5728,20 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = __pyx_t_7; goto __pyx_L15_bool_binop_done; } - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 448, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 448, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 448, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 448, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __pyx_t_5 = __pyx_t_7; __pyx_L15_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":451 + /* "gensim/models/fasttext_inner.pyx":449 * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): * continue # <<<<<<<<<<<<<< @@ -5770,7 +5750,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":450 + /* "gensim/models/fasttext_inner.pyx":448 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -5779,45 +5759,45 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":452 + /* "gensim/models/fasttext_inner.pyx":450 * if sample and word.sample_int < random_int32(&next_random): * continue * indexes[effective_words] = word.index # <<<<<<<<<<<<<< * * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] */ - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; (__pyx_v_indexes[__pyx_v_effective_words]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":454 + /* "gensim/models/fasttext_inner.pyx":452 * indexes[effective_words] = word.index * * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] # <<<<<<<<<<<<<< - * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) - * subwords_idx_len[effective_words] = (len(subwords) + 1) + * word_subwords = np.array(subwords, dtype=np.uint32) + * subwords_idx_len[effective_words] = len(subwords) */ - __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams_word); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams_word); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_index2word); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_index2word); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_17 = PyObject_GetItem(__pyx_t_16, __pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_17 = PyObject_GetItem(__pyx_t_16, __pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_t_17); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_t_17); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; @@ -5825,9 +5805,9 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_17 = __pyx_t_13; __Pyx_INCREF(__pyx_t_17); __pyx_t_18 = 0; __pyx_t_19 = NULL; } else { - __pyx_t_18 = -1; __pyx_t_17 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_18 = -1; __pyx_t_17 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); - __pyx_t_19 = Py_TYPE(__pyx_t_17)->tp_iternext; if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_19 = Py_TYPE(__pyx_t_17)->tp_iternext; if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 452, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; for (;;) { @@ -5835,17 +5815,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT if (likely(PyList_CheckExact(__pyx_t_17))) { if (__pyx_t_18 >= PyList_GET_SIZE(__pyx_t_17)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyList_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_13 = PyList_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 452, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); #endif } else { if (__pyx_t_18 >= PyTuple_GET_SIZE(__pyx_t_17)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 452, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); #endif } @@ -5855,7 +5835,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 454, __pyx_L1_error) + else __PYX_ERR(0, 452, __pyx_L1_error) } break; } @@ -5863,98 +5843,88 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT } __Pyx_XDECREF_SET(__pyx_v_subword_i, __pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_v_subword_i); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_v_subword_i); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_13))) __PYX_ERR(0, 454, __pyx_L1_error) + if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_13))) __PYX_ERR(0, 452, __pyx_L1_error) __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; } __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_XDECREF_SET(__pyx_v_subwords, ((PyObject*)__pyx_t_14)); __pyx_t_14 = 0; - /* "gensim/models/fasttext_inner.pyx":455 + /* "gensim/models/fasttext_inner.pyx":453 * * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] - * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) # <<<<<<<<<<<<<< - * subwords_idx_len[effective_words] = (len(subwords) + 1) + * word_subwords = np.array(subwords, dtype=np.uint32) # <<<<<<<<<<<<<< + * subwords_idx_len[effective_words] = len(subwords) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) */ - __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 455, __pyx_L1_error) + __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 455, __pyx_L1_error) + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 455, __pyx_L1_error) + __pyx_t_14 = PyTuple_New(1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_13 = PyList_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 455, __pyx_L1_error) + __Pyx_INCREF(__pyx_v_subwords); + __Pyx_GIVEREF(__pyx_v_subwords); + PyTuple_SET_ITEM(__pyx_t_14, 0, __pyx_v_subwords); + __pyx_t_13 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __Pyx_GIVEREF(__pyx_t_14); - PyList_SET_ITEM(__pyx_t_13, 0, __pyx_t_14); - __pyx_t_14 = 0; - __pyx_t_14 = PyNumber_Add(__pyx_t_13, __pyx_v_subwords); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 455, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_14); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyTuple_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 455, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __Pyx_GIVEREF(__pyx_t_14); - PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_14); - __pyx_t_14 = 0; - __pyx_t_14 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 455, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_14); - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 455, __pyx_L1_error) + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_uint32); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 455, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_uint32); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyDict_SetItem(__pyx_t_14, __pyx_n_s_dtype, __pyx_t_16) < 0) __PYX_ERR(0, 455, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_13, __pyx_n_s_dtype, __pyx_t_16) < 0) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __pyx_t_16 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_t_13, __pyx_t_14); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 455, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_t_14, __pyx_t_13); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 453, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; __Pyx_XDECREF_SET(__pyx_v_word_subwords, __pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":456 + /* "gensim/models/fasttext_inner.pyx":454 * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] - * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) - * subwords_idx_len[effective_words] = (len(subwords) + 1) # <<<<<<<<<<<<<< + * word_subwords = np.array(subwords, dtype=np.uint32) + * subwords_idx_len[effective_words] = len(subwords) # <<<<<<<<<<<<<< * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 */ - __pyx_t_18 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 456, __pyx_L1_error) - (__pyx_v_subwords_idx_len[__pyx_v_effective_words]) = ((int)(__pyx_t_18 + 1)); + __pyx_t_18 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 454, __pyx_L1_error) + (__pyx_v_subwords_idx_len[__pyx_v_effective_words]) = ((int)__pyx_t_18); - /* "gensim/models/fasttext_inner.pyx":457 - * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) - * subwords_idx_len[effective_words] = (len(subwords) + 1) + /* "gensim/models/fasttext_inner.pyx":455 + * word_subwords = np.array(subwords, dtype=np.uint32) + * subwords_idx_len[effective_words] = len(subwords) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) # <<<<<<<<<<<<<< * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords */ - if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 457, __pyx_L1_error) + if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 455, __pyx_L1_error) (__pyx_v_subwords_idx[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_subwords))); - /* "gensim/models/fasttext_inner.pyx":459 + /* "gensim/models/fasttext_inner.pyx":457 * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords # <<<<<<<<<<<<<< * * if hs: */ - __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 457, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_16, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 459, __pyx_L1_error) + if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_16, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 457, __pyx_L1_error) __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":461 + /* "gensim/models/fasttext_inner.pyx":459 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -5964,46 +5934,46 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":462 + /* "gensim/models/fasttext_inner.pyx":460 * * if hs: * codelens[effective_words] = len(word.code) # <<<<<<<<<<<<<< * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 462, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 460, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - __pyx_t_18 = PyObject_Length(__pyx_t_16); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 462, __pyx_L1_error) + __pyx_t_18 = PyObject_Length(__pyx_t_16); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 460, __pyx_L1_error) __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; (__pyx_v_codelens[__pyx_v_effective_words]) = ((int)__pyx_t_18); - /* "gensim/models/fasttext_inner.pyx":463 + /* "gensim/models/fasttext_inner.pyx":461 * if hs: * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 463, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 461, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 463, __pyx_L1_error) + if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 461, __pyx_L1_error) (__pyx_v_codes[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_16))); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":464 + /* "gensim/models/fasttext_inner.pyx":462 * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 464, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 462, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 464, __pyx_L1_error) + if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 462, __pyx_L1_error) (__pyx_v_points[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_16))); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":461 + /* "gensim/models/fasttext_inner.pyx":459 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -6012,7 +5982,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":465 + /* "gensim/models/fasttext_inner.pyx":463 * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 # <<<<<<<<<<<<<< @@ -6021,7 +5991,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_effective_words = (__pyx_v_effective_words + 1); - /* "gensim/models/fasttext_inner.pyx":466 + /* "gensim/models/fasttext_inner.pyx":464 * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< @@ -6031,7 +6001,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = ((__pyx_v_effective_words == 0x2710) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":467 + /* "gensim/models/fasttext_inner.pyx":465 * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< @@ -6040,7 +6010,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L12_break; - /* "gensim/models/fasttext_inner.pyx":466 + /* "gensim/models/fasttext_inner.pyx":464 * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< @@ -6049,7 +6019,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":446 + /* "gensim/models/fasttext_inner.pyx":444 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< @@ -6061,7 +6031,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_L12_break:; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":472 + /* "gensim/models/fasttext_inner.pyx":470 * # across sentence boundaries. * # indices of sentence number X are between tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 479, __pyx_L1_error) + __pyx_t_10 = Py_TYPE(__pyx_t_16)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 477, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; for (;;) { @@ -6202,17 +6172,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT if (likely(PyList_CheckExact(__pyx_t_16))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_16)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 479, __pyx_L1_error) + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 477, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 479, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 477, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_16)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 479, __pyx_L1_error) + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 477, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 479, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 477, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } @@ -6222,7 +6192,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 479, __pyx_L1_error) + else __PYX_ERR(0, 477, __pyx_L1_error) } break; } @@ -6233,17 +6203,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "gensim/models/fasttext_inner.pyx":480 + /* "gensim/models/fasttext_inner.pyx":478 * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * # release GIL & train on all sentences */ - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 480, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 478, __pyx_L1_error) (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":479 + /* "gensim/models/fasttext_inner.pyx":477 * * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): # <<<<<<<<<<<<<< @@ -6253,7 +6223,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT } __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":483 + /* "gensim/models/fasttext_inner.pyx":481 * * # release GIL & train on all sentences * with nogil: # <<<<<<<<<<<<<< @@ -6268,7 +6238,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT #endif /*try:*/ { - /* "gensim/models/fasttext_inner.pyx":484 + /* "gensim/models/fasttext_inner.pyx":482 * # release GIL & train on all sentences * with nogil: * for sent_idx in range(effective_sentences): # <<<<<<<<<<<<<< @@ -6279,7 +6249,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT for (__pyx_t_20 = 0; __pyx_t_20 < __pyx_t_2; __pyx_t_20+=1) { __pyx_v_sent_idx = __pyx_t_20; - /* "gensim/models/fasttext_inner.pyx":485 + /* "gensim/models/fasttext_inner.pyx":483 * with nogil: * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] # <<<<<<<<<<<<<< @@ -6288,7 +6258,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_idx_start = (__pyx_v_sentence_idx[__pyx_v_sent_idx]); - /* "gensim/models/fasttext_inner.pyx":486 + /* "gensim/models/fasttext_inner.pyx":484 * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] # <<<<<<<<<<<<<< @@ -6297,7 +6267,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_idx_end = (__pyx_v_sentence_idx[(__pyx_v_sent_idx + 1)]); - /* "gensim/models/fasttext_inner.pyx":487 + /* "gensim/models/fasttext_inner.pyx":485 * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): # <<<<<<<<<<<<<< @@ -6308,7 +6278,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT for (__pyx_t_22 = __pyx_v_idx_start; __pyx_t_22 < __pyx_t_21; __pyx_t_22+=1) { __pyx_v_i = __pyx_t_22; - /* "gensim/models/fasttext_inner.pyx":488 + /* "gensim/models/fasttext_inner.pyx":486 * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -6317,7 +6287,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":489 + /* "gensim/models/fasttext_inner.pyx":487 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -6327,7 +6297,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = ((__pyx_v_j < __pyx_v_idx_start) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":490 + /* "gensim/models/fasttext_inner.pyx":488 * j = i - window + reduced_windows[i] * if j < idx_start: * j = idx_start # <<<<<<<<<<<<<< @@ -6336,7 +6306,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_j = __pyx_v_idx_start; - /* "gensim/models/fasttext_inner.pyx":489 + /* "gensim/models/fasttext_inner.pyx":487 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -6345,7 +6315,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":491 + /* "gensim/models/fasttext_inner.pyx":489 * if j < idx_start: * j = idx_start * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -6354,7 +6324,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":492 + /* "gensim/models/fasttext_inner.pyx":490 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -6364,7 +6334,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = ((__pyx_v_k > __pyx_v_idx_end) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":493 + /* "gensim/models/fasttext_inner.pyx":491 * k = i + window + 1 - reduced_windows[i] * if k > idx_end: * k = idx_end # <<<<<<<<<<<<<< @@ -6373,7 +6343,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_k = __pyx_v_idx_end; - /* "gensim/models/fasttext_inner.pyx":492 + /* "gensim/models/fasttext_inner.pyx":490 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -6382,7 +6352,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":495 + /* "gensim/models/fasttext_inner.pyx":493 * k = idx_end * * if hs: # <<<<<<<<<<<<<< @@ -6392,7 +6362,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":496 + /* "gensim/models/fasttext_inner.pyx":494 * * if hs: * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0_vocab, syn0_ngrams, syn1, size, indexes,subwords_idx, # <<<<<<<<<<<<<< @@ -6401,7 +6371,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1, __pyx_v_size, __pyx_v_indexes, __pyx_v_subwords_idx, __pyx_v_subwords_idx_len, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":495 + /* "gensim/models/fasttext_inner.pyx":493 * k = idx_end * * if hs: # <<<<<<<<<<<<<< @@ -6410,7 +6380,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":498 + /* "gensim/models/fasttext_inner.pyx":496 * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0_vocab, syn0_ngrams, syn1, size, indexes,subwords_idx, * subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< @@ -6420,7 +6390,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":499 + /* "gensim/models/fasttext_inner.pyx":497 * subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, word_locks_ngrams) * if negative: * next_random = fast_sentence_cbow_neg(negative, cum_table, cum_table_len, codelens, neu1, syn0_vocab, syn0_ngrams, syn1neg, size, indexes, subwords_idx, # <<<<<<<<<<<<<< @@ -6429,7 +6399,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_next_random = __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1neg, __pyx_v_size, __pyx_v_indexes, __pyx_v_subwords_idx, __pyx_v_subwords_idx_len, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_next_random, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":498 + /* "gensim/models/fasttext_inner.pyx":496 * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0_vocab, syn0_ngrams, syn1, size, indexes,subwords_idx, * subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< @@ -6441,7 +6411,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT } } - /* "gensim/models/fasttext_inner.pyx":483 + /* "gensim/models/fasttext_inner.pyx":481 * * # release GIL & train on all sentences * with nogil: # <<<<<<<<<<<<<< @@ -6460,7 +6430,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT } } - /* "gensim/models/fasttext_inner.pyx":502 + /* "gensim/models/fasttext_inner.pyx":500 * subwords_idx_len, _alpha, work, i, j, k, cbow_mean, next_random, word_locks_vocab, word_locks_ngrams) * * return effective_words # <<<<<<<<<<<<<< @@ -6468,13 +6438,13 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 502, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 500, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __pyx_r = __pyx_t_16; __pyx_t_16 = 0; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":382 + /* "gensim/models/fasttext_inner.pyx":380 * * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< @@ -6508,7 +6478,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT return __pyx_r; } -/* "gensim/models/fasttext_inner.pyx":505 +/* "gensim/models/fasttext_inner.pyx":503 * * * def init(): # <<<<<<<<<<<<<< @@ -6547,7 +6517,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P int __pyx_t_4; __Pyx_RefNannySetupContext("init", 0); - /* "gensim/models/fasttext_inner.pyx":514 + /* "gensim/models/fasttext_inner.pyx":512 * * cdef int i * cdef float *x = [10.0] # <<<<<<<<<<<<<< @@ -6557,7 +6527,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_1[0] = ((float)10.0); __pyx_v_x = __pyx_t_1; - /* "gensim/models/fasttext_inner.pyx":515 + /* "gensim/models/fasttext_inner.pyx":513 * cdef int i * cdef float *x = [10.0] * cdef float *y = [0.01] # <<<<<<<<<<<<<< @@ -6567,7 +6537,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_2[0] = ((float)0.01); __pyx_v_y = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":516 + /* "gensim/models/fasttext_inner.pyx":514 * cdef float *x = [10.0] * cdef float *y = [0.01] * cdef float expected = 0.1 # <<<<<<<<<<<<<< @@ -6576,7 +6546,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_expected = ((float)0.1); - /* "gensim/models/fasttext_inner.pyx":517 + /* "gensim/models/fasttext_inner.pyx":515 * cdef float *y = [0.01] * cdef float expected = 0.1 * cdef int size = 1 # <<<<<<<<<<<<<< @@ -6585,7 +6555,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_size = 1; - /* "gensim/models/fasttext_inner.pyx":522 + /* "gensim/models/fasttext_inner.pyx":520 * * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): # <<<<<<<<<<<<<< @@ -6595,7 +6565,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P for (__pyx_t_3 = 0; __pyx_t_3 < 0x3E8; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "gensim/models/fasttext_inner.pyx":523 + /* "gensim/models/fasttext_inner.pyx":521 * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) # <<<<<<<<<<<<<< @@ -6604,7 +6574,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)exp(((((__pyx_v_i / ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0x3E8)) * 2.0) - 1.0) * 6.0))); - /* "gensim/models/fasttext_inner.pyx":524 + /* "gensim/models/fasttext_inner.pyx":522 * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) # <<<<<<<<<<<<<< @@ -6613,7 +6583,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) / ((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) + 1.0))); - /* "gensim/models/fasttext_inner.pyx":525 + /* "gensim/models/fasttext_inner.pyx":523 * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) * LOG_TABLE[i] = log( EXP_TABLE[i] ) # <<<<<<<<<<<<<< @@ -6623,7 +6593,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P (__pyx_v_6gensim_6models_14fasttext_inner_LOG_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)log((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]))); } - /* "gensim/models/fasttext_inner.pyx":528 + /* "gensim/models/fasttext_inner.pyx":526 * * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) # <<<<<<<<<<<<<< @@ -6632,7 +6602,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_d_res = __pyx_v_6gensim_6models_14word2vec_inner_dsdot((&__pyx_v_size), __pyx_v_x, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_y, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":529 + /* "gensim/models/fasttext_inner.pyx":527 * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res # <<<<<<<<<<<<<< @@ -6641,7 +6611,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_p_res = ((float *)(&__pyx_v_d_res)); - /* "gensim/models/fasttext_inner.pyx":530 + /* "gensim/models/fasttext_inner.pyx":528 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6651,7 +6621,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_4 = ((fabs((__pyx_v_d_res - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":531 + /* "gensim/models/fasttext_inner.pyx":529 * p_res = &d_res * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double # <<<<<<<<<<<<<< @@ -6660,7 +6630,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_double; - /* "gensim/models/fasttext_inner.pyx":532 + /* "gensim/models/fasttext_inner.pyx":530 * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -6669,7 +6639,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_6gensim_6models_14word2vec_inner_saxpy; - /* "gensim/models/fasttext_inner.pyx":533 + /* "gensim/models/fasttext_inner.pyx":531 * our_dot = our_dot_double * our_saxpy = saxpy * return 0 # double # <<<<<<<<<<<<<< @@ -6681,7 +6651,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_r = __pyx_int_0; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":530 + /* "gensim/models/fasttext_inner.pyx":528 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6690,7 +6660,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ } - /* "gensim/models/fasttext_inner.pyx":534 + /* "gensim/models/fasttext_inner.pyx":532 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6700,7 +6670,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_4 = ((fabsf(((__pyx_v_p_res[0]) - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":535 + /* "gensim/models/fasttext_inner.pyx":533 * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float # <<<<<<<<<<<<<< @@ -6709,7 +6679,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_float; - /* "gensim/models/fasttext_inner.pyx":536 + /* "gensim/models/fasttext_inner.pyx":534 * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -6718,7 +6688,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_6gensim_6models_14word2vec_inner_saxpy; - /* "gensim/models/fasttext_inner.pyx":537 + /* "gensim/models/fasttext_inner.pyx":535 * our_dot = our_dot_float * our_saxpy = saxpy * return 1 # float # <<<<<<<<<<<<<< @@ -6730,7 +6700,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_r = __pyx_int_1; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":534 + /* "gensim/models/fasttext_inner.pyx":532 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6739,7 +6709,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ } - /* "gensim/models/fasttext_inner.pyx":541 + /* "gensim/models/fasttext_inner.pyx":539 * # neither => use cython loops, no BLAS * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas # <<<<<<<<<<<<<< @@ -6749,7 +6719,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P /*else*/ { __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_noblas; - /* "gensim/models/fasttext_inner.pyx":542 + /* "gensim/models/fasttext_inner.pyx":540 * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas # <<<<<<<<<<<<<< @@ -6758,7 +6728,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas; - /* "gensim/models/fasttext_inner.pyx":543 + /* "gensim/models/fasttext_inner.pyx":541 * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas * return 2 # <<<<<<<<<<<<<< @@ -6771,7 +6741,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P goto __pyx_L0; } - /* "gensim/models/fasttext_inner.pyx":505 + /* "gensim/models/fasttext_inner.pyx":503 * * * def init(): # <<<<<<<<<<<<<< @@ -9516,7 +9486,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { static int __Pyx_InitCachedBuiltins(void) { __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_n_s_ImportError); if (!__pyx_builtin_ImportError) __PYX_ERR(0, 22, __pyx_L1_error) __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 67, __pyx_L1_error) - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(0, 357, __pyx_L1_error) + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(0, 355, __pyx_L1_error) __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) __PYX_ERR(1, 235, __pyx_L1_error) __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) __PYX_ERR(1, 823, __pyx_L1_error) return 0; @@ -9528,31 +9498,31 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - /* "gensim/models/fasttext_inner.pyx":311 + /* "gensim/models/fasttext_inner.pyx":309 * cum_table_len = len(model.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 311, __pyx_L1_error) + __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 309, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple_); __Pyx_GIVEREF(__pyx_tuple_); - __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(0, 311, __pyx_L1_error) + __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(0, 309, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); - /* "gensim/models/fasttext_inner.pyx":434 + /* "gensim/models/fasttext_inner.pyx":432 * cum_table_len = len(model.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 434, __pyx_L1_error) + __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__3); __Pyx_GIVEREF(__pyx_tuple__3); - __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(0, 434, __pyx_L1_error) + __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(0, 432, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__4); __Pyx_GIVEREF(__pyx_tuple__4); @@ -9653,41 +9623,41 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__13); __Pyx_GIVEREF(__pyx_tuple__13); - /* "gensim/models/fasttext_inner.pyx":258 + /* "gensim/models/fasttext_inner.pyx":256 * * * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__15 = PyTuple_Pack(47, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_l1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_l1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword_i); if (unlikely(!__pyx_tuple__15)) __PYX_ERR(0, 258, __pyx_L1_error) + __pyx_tuple__15 = PyTuple_Pack(47, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_l1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_l1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword_i); if (unlikely(!__pyx_tuple__15)) __PYX_ERR(0, 256, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__15); __Pyx_GIVEREF(__pyx_tuple__15); - __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(5, 0, 47, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_train_batch_sg, 258, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) __PYX_ERR(0, 258, __pyx_L1_error) + __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(5, 0, 47, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_train_batch_sg, 256, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) __PYX_ERR(0, 256, __pyx_L1_error) - /* "gensim/models/fasttext_inner.pyx":382 + /* "gensim/models/fasttext_inner.pyx":380 * * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__17 = PyTuple_Pack(48, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_cbow_mean, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_neu1_2, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword_i); if (unlikely(!__pyx_tuple__17)) __PYX_ERR(0, 382, __pyx_L1_error) + __pyx_tuple__17 = PyTuple_Pack(48, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_cbow_mean, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_neu1_2, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword_i); if (unlikely(!__pyx_tuple__17)) __PYX_ERR(0, 380, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__17); __Pyx_GIVEREF(__pyx_tuple__17); - __pyx_codeobj__18 = (PyObject*)__Pyx_PyCode_New(5, 0, 48, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__17, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_train_batch_cbow, 382, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__18)) __PYX_ERR(0, 382, __pyx_L1_error) + __pyx_codeobj__18 = (PyObject*)__Pyx_PyCode_New(5, 0, 48, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__17, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_train_batch_cbow, 380, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__18)) __PYX_ERR(0, 380, __pyx_L1_error) - /* "gensim/models/fasttext_inner.pyx":505 + /* "gensim/models/fasttext_inner.pyx":503 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_tuple__19 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__19)) __PYX_ERR(0, 505, __pyx_L1_error) + __pyx_tuple__19 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__19)) __PYX_ERR(0, 503, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__19); __Pyx_GIVEREF(__pyx_tuple__19); - __pyx_codeobj__20 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__19, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_init, 505, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__20)) __PYX_ERR(0, 505, __pyx_L1_error) + __pyx_codeobj__20 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__19, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_init, 503, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__20)) __PYX_ERR(0, 503, __pyx_L1_error) __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -10093,49 +10063,49 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) */ __pyx_v_6gensim_6models_14fasttext_inner_ONEF = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)1.0); - /* "gensim/models/fasttext_inner.pyx":258 + /* "gensim/models/fasttext_inner.pyx":256 * * * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_1train_batch_sg, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 258, __pyx_L1_error) + __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_1train_batch_sg, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 256, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_sg, __pyx_t_9) < 0) __PYX_ERR(0, 258, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_sg, __pyx_t_9) < 0) __PYX_ERR(0, 256, __pyx_L1_error) __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":382 + /* "gensim/models/fasttext_inner.pyx":380 * * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_3train_batch_cbow, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 382, __pyx_L1_error) + __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_3train_batch_cbow, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 380, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_cbow, __pyx_t_9) < 0) __PYX_ERR(0, 382, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_cbow, __pyx_t_9) < 0) __PYX_ERR(0, 380, __pyx_L1_error) __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":505 + /* "gensim/models/fasttext_inner.pyx":503 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_5init, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 505, __pyx_L1_error) + __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_5init, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 503, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_9) < 0) __PYX_ERR(0, 505, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_9) < 0) __PYX_ERR(0, 503, __pyx_L1_error) __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":545 + /* "gensim/models/fasttext_inner.pyx":543 * return 2 * * FAST_VERSION = init() # initialize the module # <<<<<<<<<<<<<< * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN */ - __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 545, __pyx_L1_error) + __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 543, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_3 = NULL; if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_4))) { @@ -10148,22 +10118,22 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) } } if (__pyx_t_3) { - __pyx_t_9 = __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_t_3); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 545, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_t_3); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 543, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { - __pyx_t_9 = __Pyx_PyObject_CallNoArg(__pyx_t_4); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 545, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_CallNoArg(__pyx_t_4); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 543, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_9) < 0) __PYX_ERR(0, 545, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_9) < 0) __PYX_ERR(0, 543, __pyx_L1_error) __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "gensim/models/fasttext_inner.pyx":546 + /* "gensim/models/fasttext_inner.pyx":544 * * FAST_VERSION = init() # initialize the module * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN # <<<<<<<<<<<<<< */ - if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_int_10000) < 0) __PYX_ERR(0, 546, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_int_10000) < 0) __PYX_ERR(0, 544, __pyx_L1_error) /* "gensim/models/fasttext_inner.pyx":1 * #!/usr/bin/env cython # <<<<<<<<<<<<<< diff --git a/gensim/models/fasttext_inner.pyx b/gensim/models/fasttext_inner.pyx index 6ae1cb7431..bc9d8af9c0 100644 --- a/gensim/models/fasttext_inner.pyx +++ b/gensim/models/fasttext_inner.pyx @@ -65,8 +65,7 @@ cdef unsigned long long fast_sentence_sg_neg( scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) for d in range(1, subwords_len): - subword_row = subwords_index[d] * size - our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) + our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) cdef REAL_t norm_factor = ONEF / subwords_len sscal(&size, &norm_factor, l1 , &ONE) @@ -112,8 +111,7 @@ cdef void fast_sentence_sg_hs( scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) for d in range(1, subwords_len): - subword_row = subwords_index[d] * size - our_saxpy(&size, &ONEF, &syn0_ngrams[subword_row], &ONE, l1, &ONE) + our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) cdef REAL_t norm_factor = ONEF / subwords_len sscal(&size, &norm_factor, l1 , &ONE) @@ -157,7 +155,7 @@ cdef unsigned long long fast_sentence_cbow_neg( else: count += ONEF our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) - for d in range(1, subwords_idx_len[m]): + for d in range(subwords_idx_len[m]): count += ONEF our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) @@ -197,7 +195,7 @@ cdef unsigned long long fast_sentence_cbow_neg( continue else: our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) - for d in range(1, subwords_idx_len[m]): + for d in range(subwords_idx_len[m]): our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) return next_random @@ -223,7 +221,7 @@ cdef void fast_sentence_cbow_hs( else: count += ONEF our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) - for d in range(1, subwords_idx_len[m]): + for d in range(subwords_idx_len[m]): count += ONEF our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) if count > (0.5): @@ -251,7 +249,7 @@ cdef void fast_sentence_cbow_hs( continue else: our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) - for d in range(1, subwords_idx_len[m]): + for d in range(subwords_idx_len[m]): our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) @@ -452,8 +450,8 @@ def train_batch_cbow(model, sentences, alpha, _work, _neu1): indexes[effective_words] = word.index subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] - word_subwords = np.array([word.index] + subwords, dtype=np.uint32) - subwords_idx_len[effective_words] = (len(subwords) + 1) + word_subwords = np.array(subwords, dtype=np.uint32) + subwords_idx_len[effective_words] = len(subwords) subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) # ensures reference count of word_subwords doesn't reach 0 subword_arrays[effective_words] = word_subwords From 19a5da0a79a6678943ff6c9f6f7f8bd2371ef118 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Thu, 30 Nov 2017 18:10:54 +0530 Subject: [PATCH 10/22] corrects parameters order for word_locks* in cbow --- gensim/models/fasttext_inner.c | 4 ++-- gensim/models/fasttext_inner.pyx | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/gensim/models/fasttext_inner.c b/gensim/models/fasttext_inner.c index 87a174975b..2d343b4f26 100644 --- a/gensim/models/fasttext_inner.c +++ b/gensim/models/fasttext_inner.c @@ -2515,7 +2515,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t * REAL_t *neu1, REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1neg, const int size, */ -static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_cum_table, unsigned PY_LONG_LONG __pyx_v_cum_table_len, CYTHON_UNUSED int *__pyx_v_codelens, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_vocab, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_ngrams, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5numpy_uint32_t const **__pyx_v_subwords_idx, int const *__pyx_v_subwords_idx_len, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, unsigned PY_LONG_LONG __pyx_v_next_random, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_ngrams, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_vocab) { +static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_cum_table, unsigned PY_LONG_LONG __pyx_v_cum_table_len, CYTHON_UNUSED int *__pyx_v_codelens, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_vocab, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_ngrams, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5numpy_uint32_t const **__pyx_v_subwords_idx, int const *__pyx_v_subwords_idx_len, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, unsigned PY_LONG_LONG __pyx_v_next_random, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_vocab, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_ngrams) { PY_LONG_LONG __pyx_v_row2; unsigned PY_LONG_LONG __pyx_v_modulo; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f; @@ -3069,7 +3069,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente * REAL_t *neu1, REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1, const int size, */ -static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int *__pyx_v_codelens, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_vocab, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_ngrams, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5numpy_uint32_t const **__pyx_v_subwords_idx, int const *__pyx_v_subwords_idx_len, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_ngrams, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_vocab) { +static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int *__pyx_v_codelens, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_vocab, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0_ngrams, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5numpy_uint32_t const **__pyx_v_subwords_idx, int const *__pyx_v_subwords_idx_len, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_vocab, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks_ngrams) { PY_LONG_LONG __pyx_v_b; PY_LONG_LONG __pyx_v_row2; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f; diff --git a/gensim/models/fasttext_inner.pyx b/gensim/models/fasttext_inner.pyx index bc9d8af9c0..4dd1f3df27 100644 --- a/gensim/models/fasttext_inner.pyx +++ b/gensim/models/fasttext_inner.pyx @@ -136,7 +136,7 @@ cdef unsigned long long fast_sentence_cbow_neg( REAL_t *neu1, REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1neg, const int size, const np.uint32_t indexes[MAX_SENTENCE_LEN], const np.uint32_t *subwords_idx[MAX_SENTENCE_LEN], const int subwords_idx_len[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, - int i, int j, int k, int cbow_mean, unsigned long long next_random, REAL_t *word_locks_ngrams, REAL_t *word_locks_vocab) nogil: + int i, int j, int k, int cbow_mean, unsigned long long next_random, REAL_t *word_locks_vocab, REAL_t *word_locks_ngrams) nogil: cdef long long a cdef long long row2 @@ -206,7 +206,7 @@ cdef void fast_sentence_cbow_hs( REAL_t *neu1, REAL_t *syn0_vocab, REAL_t *syn0_ngrams, REAL_t *syn1, const int size, const np.uint32_t indexes[MAX_SENTENCE_LEN], const np.uint32_t *subwords_idx[MAX_SENTENCE_LEN], const int subwords_idx_len[MAX_SENTENCE_LEN],const REAL_t alpha, REAL_t *work, - int i, int j, int k, int cbow_mean, REAL_t *word_locks_ngrams, REAL_t *word_locks_vocab) nogil: + int i, int j, int k, int cbow_mean, REAL_t *word_locks_vocab, REAL_t *word_locks_ngrams) nogil: cdef long long a, b cdef long long row2, sgn @@ -541,4 +541,4 @@ def init(): return 2 FAST_VERSION = init() # initialize the module -MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN \ No newline at end of file +MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN From 9dafbd552d8e3ed563510805cf8d3ff4b750871a Mon Sep 17 00:00:00 2001 From: manneshiva Date: Fri, 1 Dec 2017 13:58:11 +0530 Subject: [PATCH 11/22] fixes indentation, unused imports and logging warning for slow version --- gensim/models/fasttext.py | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index f089bc65de..c5674f1a06 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -9,16 +9,17 @@ from gensim.models.word2vec import Word2Vec, train_sg_pair, train_cbow_pair from gensim.models.wrappers.fasttext import FastTextKeyedVectors from gensim.models.wrappers.fasttext import FastText as Ft_Wrapper, compute_ngrams, ft_hash -from gensim import matutils logger = logging.getLogger(__name__) try: from gensim.models.fasttext_inner import train_batch_sg, train_batch_cbow from gensim.models.fasttext_inner import FAST_VERSION, MAX_WORDS_IN_BATCH + logger.debug('Fast version of Fasttext is being used') except ImportError: # failed... fall back to plain numpy (20-80x slower training than the above) + logger.warning('Slow version of Fasttext is being used') FAST_VERSION = -1 MAX_WORDS_IN_BATCH = 10000 @@ -83,7 +84,7 @@ def train_batch_sg(model, sentences, alpha, work=None, neu1=None): class FastText(Word2Vec): def __init__( self, sentences=None, sg=0, hs=0, size=100, alpha=0.025, window=5, min_count=5, - max_vocab_size=None, word_ngrams=1, loss='ns', sample=1e-3, seed=1, workers=3, min_alpha=0.0001, + max_vocab_size=None, word_ngrams=1, sample=1e-3, seed=1, workers=3, min_alpha=0.0001, negative=5, cbow_mean=1, hashfxn=hash, iter=5, null_word=0, min_n=3, max_n=6, sorted_vocab=1, bucket=2000000, trim_rule=None, batch_words=MAX_WORDS_IN_BATCH): @@ -95,7 +96,8 @@ def __init__( if self.word_ngrams <= 1 and self.max_n == 0: self.bucket = 0 - super(FastText, self).__init__(sentences=sentences, size=size, alpha=alpha, window=window, min_count=min_count, + super(FastText, self).__init__( + sentences=sentences, size=size, alpha=alpha, window=window, min_count=min_count, max_vocab_size=max_vocab_size, sample=sample, seed=seed, workers=workers, min_alpha=min_alpha, sg=sg, hs=hs, negative=negative, cbow_mean=cbow_mean, hashfxn=hashfxn, iter=iter, null_word=null_word, trim_rule=trim_rule, sorted_vocab=sorted_vocab, batch_words=batch_words) @@ -108,13 +110,15 @@ def initialize_word_vectors(self): def build_vocab(self, sentences, keep_raw_vocab=False, trim_rule=None, progress_per=10000, update=False): if update: if not len(self.wv.vocab): - raise RuntimeError("You cannot do an online vocabulary-update of a model which has no prior vocabulary. " + raise RuntimeError( + "You cannot do an online vocabulary-update of a model which has no prior vocabulary. " "First build the vocabulary of your model with a corpus " "before doing an online update.") self.old_vocab_len = len(self.wv.vocab) self.old_hash2index_len = len(self.wv.hash2index) - super(FastText, self).build_vocab(sentences, keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule, progress_per=progress_per, update=update) + super(FastText, self).build_vocab( + sentences, keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule, progress_per=progress_per, update=update) self.init_ngrams(update=update) def init_ngrams(self, update=False): @@ -171,10 +175,21 @@ def init_ngrams(self, update=False): rand_obj = np.random rand_obj.seed(self.seed) - new_vocab_rows = rand_obj.uniform(-1.0 / self.vector_size, 1.0 / self.vector_size, (len(self.wv.vocab) - self.old_vocab_len, self.vector_size)) - new_vocab_lockf_rows = ones((len(self.wv.vocab) - self.old_vocab_len, self.vector_size), dtype=REAL) - new_ngram_rows = rand_obj.uniform(-1.0 / self.vector_size, 1.0 / self.vector_size, (len(self.wv.hash2index) - self.old_hash2index_len, self.vector_size)) - new_ngram_lockf_rows = ones((len(self.wv.hash2index) - self.old_hash2index_len, self.vector_size), dtype=REAL) + new_vocab_rows = rand_obj.uniform( + -1.0 / self.vector_size, 1.0 / self.vector_size, + (len(self.wv.vocab) - self.old_vocab_len, self.vector_size)) + new_vocab_lockf_rows = ones( + (len(self.wv.vocab) - self.old_vocab_len, + self.vector_size), + dtype=REAL) + new_ngram_rows = rand_obj.uniform( + -1.0 / self.vector_size, 1.0 / self.vector_size, + (len(self.wv.hash2index) - self.old_hash2index_len, + self.vector_size)) + new_ngram_lockf_rows = ones( + (len(self.wv.hash2index) - self.old_hash2index_len, + self.vector_size), + dtype=REAL) self.wv.syn0_vocab = vstack([self.wv.syn0_vocab, new_vocab_rows]) self.syn0_vocab_lockf = vstack([self.syn0_vocab_lockf, new_vocab_lockf_rows]) @@ -234,4 +249,4 @@ def load_fasttext_format(cls, *args, **kwargs): def save(self, *args, **kwargs): kwargs['ignore'] = kwargs.get('ignore', ['syn0norm', 'syn0_vocab_norm', 'syn0_ngrams_norm']) - super(FastText, self).save(*args, **kwargs) \ No newline at end of file + super(FastText, self).save(*args, **kwargs) From 796ec915d9bfd66f22e16bdda7ba7b87cadb6a7b Mon Sep 17 00:00:00 2001 From: manneshiva Date: Fri, 1 Dec 2017 14:41:31 +0530 Subject: [PATCH 12/22] splits long lines and removes redundant `import`/`else` --- gensim/models/fasttext_inner.c | 1906 +++++++++++++++--------------- gensim/models/fasttext_inner.pyx | 83 +- 2 files changed, 963 insertions(+), 1026 deletions(-) diff --git a/gensim/models/fasttext_inner.c b/gensim/models/fasttext_inner.c index 2d343b4f26..45b6875645 100644 --- a/gensim/models/fasttext_inner.c +++ b/gensim/models/fasttext_inner.c @@ -525,7 +525,6 @@ static CYTHON_INLINE float __PYX_NAN() { #include "numpy/arrayobject.h" #include "numpy/ufuncobject.h" #include -#include #include "voidptr.h" #ifdef _OPENMP #include @@ -1594,8 +1593,6 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *, cha /* Module declarations from 'libc.math' */ -/* Module declarations from 'libc.stdlib' */ - /* Module declarations from 'gensim.models.word2vec_inner' */ static __pyx_t_6gensim_6models_14word2vec_inner_scopy_ptr *__pyx_vp_6gensim_6models_14word2vec_inner_scopy = 0; #define __pyx_v_6gensim_6models_14word2vec_inner_scopy (*__pyx_vp_6gensim_6models_14word2vec_inner_scopy) @@ -1677,7 +1674,6 @@ static const char __pyx_k_point[] = "point"; static const char __pyx_k_range[] = "range"; static const char __pyx_k_token[] = "token"; static const char __pyx_k_vocab[] = "vocab"; -static const char __pyx_k_gensim[] = "gensim"; static const char __pyx_k_import[] = "__import__"; static const char __pyx_k_neu1_2[] = "neu1"; static const char __pyx_k_ngrams[] = "ngrams"; @@ -1696,11 +1692,9 @@ static const char __pyx_k_syn1neg[] = "syn1neg"; static const char __pyx_k_vlookup[] = "vlookup"; static const char __pyx_k_codelens[] = "codelens"; static const char __pyx_k_expected[] = "expected"; -static const char __pyx_k_matutils[] = "matutils"; static const char __pyx_k_negative[] = "negative"; static const char __pyx_k_sent_idx[] = "sent_idx"; static const char __pyx_k_subwords[] = "subwords"; -static const char __pyx_k_word2vec[] = "word2vec"; static const char __pyx_k_cbow_mean[] = "cbow_mean"; static const char __pyx_k_cum_table[] = "cum_table"; static const char __pyx_k_enumerate[] = "enumerate"; @@ -1775,7 +1769,6 @@ static PyObject *__pyx_n_s_expected; static PyObject *__pyx_kp_s_fasttext_inner_pyx; static PyObject *__pyx_n_s_fblas; static PyObject *__pyx_n_s_float32; -static PyObject *__pyx_n_s_gensim; static PyObject *__pyx_n_s_gensim_models_fasttext_inner; static PyObject *__pyx_n_s_hs; static PyObject *__pyx_n_s_i; @@ -1793,7 +1786,6 @@ static PyObject *__pyx_n_s_l1; static PyObject *__pyx_n_s_l1_2; static PyObject *__pyx_n_s_layer1_size; static PyObject *__pyx_n_s_main; -static PyObject *__pyx_n_s_matutils; static PyObject *__pyx_n_s_model; static PyObject *__pyx_kp_u_ndarray_is_not_C_contiguous; static PyObject *__pyx_kp_u_ndarray_is_not_Fortran_contiguou; @@ -1843,7 +1835,6 @@ static PyObject *__pyx_n_s_vlookup; static PyObject *__pyx_n_s_vocab; static PyObject *__pyx_n_s_window; static PyObject *__pyx_n_s_word; -static PyObject *__pyx_n_s_word2vec; static PyObject *__pyx_n_s_word_locks_ngrams; static PyObject *__pyx_n_s_word_locks_vocab; static PyObject *__pyx_n_s_word_subwords; @@ -1882,7 +1873,7 @@ static PyObject *__pyx_codeobj__16; static PyObject *__pyx_codeobj__18; static PyObject *__pyx_codeobj__20; -/* "gensim/models/fasttext_inner.pyx":48 +/* "gensim/models/fasttext_inner.pyx":43 * * * cdef unsigned long long fast_sentence_sg_neg( # <<<<<<<<<<<<<< @@ -1909,7 +1900,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente int __pyx_t_4; int __pyx_t_5; - /* "gensim/models/fasttext_inner.pyx":56 + /* "gensim/models/fasttext_inner.pyx":51 * * cdef long long a * cdef np.uint32_t word2_index = subwords_index[0] # <<<<<<<<<<<<<< @@ -1918,7 +1909,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_word2_index = (__pyx_v_subwords_index[0]); - /* "gensim/models/fasttext_inner.pyx":57 + /* "gensim/models/fasttext_inner.pyx":52 * cdef long long a * cdef np.uint32_t word2_index = subwords_index[0] * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< @@ -1927,7 +1918,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":58 + /* "gensim/models/fasttext_inner.pyx":53 * cdef np.uint32_t word2_index = subwords_index[0] * cdef long long row1 = word2_index * size, row2 * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< @@ -1936,7 +1927,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_modulo = 281474976710655ULL; - /* "gensim/models/fasttext_inner.pyx":63 + /* "gensim/models/fasttext_inner.pyx":58 * cdef int d * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -1945,7 +1936,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":64 + /* "gensim/models/fasttext_inner.pyx":59 * * memset(work, 0, size * cython.sizeof(REAL_t)) * memset(l1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -1954,7 +1945,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ memset(__pyx_v_l1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":66 + /* "gensim/models/fasttext_inner.pyx":61 * memset(l1, 0, size * cython.sizeof(REAL_t)) * * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) # <<<<<<<<<<<<<< @@ -1963,7 +1954,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_scopy((&__pyx_v_size), (&(__pyx_v_syn0_vocab[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":67 + /* "gensim/models/fasttext_inner.pyx":62 * * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) * for d in range(1, subwords_len): # <<<<<<<<<<<<<< @@ -1974,7 +1965,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":68 + /* "gensim/models/fasttext_inner.pyx":63 * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) * for d in range(1, subwords_len): * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) # <<<<<<<<<<<<<< @@ -1984,7 +1975,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_ngrams[((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); } - /* "gensim/models/fasttext_inner.pyx":69 + /* "gensim/models/fasttext_inner.pyx":64 * for d in range(1, subwords_len): * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) * cdef REAL_t norm_factor = ONEF / subwords_len # <<<<<<<<<<<<<< @@ -1993,7 +1984,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_norm_factor = (__pyx_v_6gensim_6models_14fasttext_inner_ONEF / __pyx_v_subwords_len); - /* "gensim/models/fasttext_inner.pyx":70 + /* "gensim/models/fasttext_inner.pyx":65 * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) * cdef REAL_t norm_factor = ONEF / subwords_len * sscal(&size, &norm_factor, l1 , &ONE) # <<<<<<<<<<<<<< @@ -2002,7 +1993,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_norm_factor), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":72 + /* "gensim/models/fasttext_inner.pyx":67 * sscal(&size, &norm_factor, l1 , &ONE) * * for d in range(negative+1): # <<<<<<<<<<<<<< @@ -2013,7 +2004,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_3; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":73 + /* "gensim/models/fasttext_inner.pyx":68 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -2023,7 +2014,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_4 = ((__pyx_v_d == 0) != 0); if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":74 + /* "gensim/models/fasttext_inner.pyx":69 * for d in range(negative+1): * if d == 0: * target_index = word_index # <<<<<<<<<<<<<< @@ -2032,7 +2023,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_target_index = __pyx_v_word_index; - /* "gensim/models/fasttext_inner.pyx":75 + /* "gensim/models/fasttext_inner.pyx":70 * if d == 0: * target_index = word_index * label = ONEF # <<<<<<<<<<<<<< @@ -2041,7 +2032,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_label = __pyx_v_6gensim_6models_14fasttext_inner_ONEF; - /* "gensim/models/fasttext_inner.pyx":73 + /* "gensim/models/fasttext_inner.pyx":68 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -2051,7 +2042,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente goto __pyx_L7; } - /* "gensim/models/fasttext_inner.pyx":77 + /* "gensim/models/fasttext_inner.pyx":72 * label = ONEF * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) # <<<<<<<<<<<<<< @@ -2061,7 +2052,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente /*else*/ { __pyx_v_target_index = __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_v_cum_table, ((__pyx_v_next_random >> 16) % (__pyx_v_cum_table[(__pyx_v_cum_table_len - 1)])), 0, __pyx_v_cum_table_len); - /* "gensim/models/fasttext_inner.pyx":78 + /* "gensim/models/fasttext_inner.pyx":73 * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< @@ -2070,7 +2061,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "gensim/models/fasttext_inner.pyx":79 + /* "gensim/models/fasttext_inner.pyx":74 * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -2080,7 +2071,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_4 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":80 + /* "gensim/models/fasttext_inner.pyx":75 * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: * continue # <<<<<<<<<<<<<< @@ -2089,7 +2080,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ goto __pyx_L5_continue; - /* "gensim/models/fasttext_inner.pyx":79 + /* "gensim/models/fasttext_inner.pyx":74 * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -2098,7 +2089,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":81 + /* "gensim/models/fasttext_inner.pyx":76 * if target_index == word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< @@ -2109,7 +2100,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente } __pyx_L7:; - /* "gensim/models/fasttext_inner.pyx":83 + /* "gensim/models/fasttext_inner.pyx":78 * label = 0.0 * * row2 = target_index * size # <<<<<<<<<<<<<< @@ -2118,7 +2109,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":84 + /* "gensim/models/fasttext_inner.pyx":79 * * row2 = target_index * size * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2127,7 +2118,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_f_dot = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":85 + /* "gensim/models/fasttext_inner.pyx":80 * row2 = target_index * size * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2145,7 +2136,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_L10_bool_binop_done:; if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":86 + /* "gensim/models/fasttext_inner.pyx":81 * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2154,7 +2145,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ goto __pyx_L5_continue; - /* "gensim/models/fasttext_inner.pyx":85 + /* "gensim/models/fasttext_inner.pyx":80 * row2 = target_index * size * f_dot = our_dot(&size, l1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2163,7 +2154,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":87 + /* "gensim/models/fasttext_inner.pyx":82 * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2172,7 +2163,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f_dot + 6.0) * 83.0))]); - /* "gensim/models/fasttext_inner.pyx":88 + /* "gensim/models/fasttext_inner.pyx":83 * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< @@ -2181,7 +2172,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/fasttext_inner.pyx":89 + /* "gensim/models/fasttext_inner.pyx":84 * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2190,7 +2181,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":90 + /* "gensim/models/fasttext_inner.pyx":85 * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, l1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2201,7 +2192,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_L5_continue:; } - /* "gensim/models/fasttext_inner.pyx":91 + /* "gensim/models/fasttext_inner.pyx":86 * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, l1, &ONE, &syn1neg[row2], &ONE) * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) # <<<<<<<<<<<<<< @@ -2210,7 +2201,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[__pyx_v_word2_index])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":92 + /* "gensim/models/fasttext_inner.pyx":87 * our_saxpy(&size, &g, l1, &ONE, &syn1neg[row2], &ONE) * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) * for d in range(1, subwords_len): # <<<<<<<<<<<<<< @@ -2221,7 +2212,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":93 + /* "gensim/models/fasttext_inner.pyx":88 * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) * for d in range(1, subwords_len): * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) # <<<<<<<<<<<<<< @@ -2231,7 +2222,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_ngrams[(__pyx_v_subwords_index[__pyx_v_d])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_ngrams[((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); } - /* "gensim/models/fasttext_inner.pyx":94 + /* "gensim/models/fasttext_inner.pyx":89 * for d in range(1, subwords_len): * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) * return next_random # <<<<<<<<<<<<<< @@ -2241,7 +2232,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_r = __pyx_v_next_random; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":48 + /* "gensim/models/fasttext_inner.pyx":43 * * * cdef unsigned long long fast_sentence_sg_neg( # <<<<<<<<<<<<<< @@ -2254,7 +2245,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente return __pyx_r; } -/* "gensim/models/fasttext_inner.pyx":97 +/* "gensim/models/fasttext_inner.pyx":92 * * * cdef void fast_sentence_sg_hs( # <<<<<<<<<<<<<< @@ -2279,7 +2270,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t int __pyx_t_5; int __pyx_t_6; - /* "gensim/models/fasttext_inner.pyx":105 + /* "gensim/models/fasttext_inner.pyx":100 * * cdef long long a, b * cdef np.uint32_t word2_index = subwords_index[0] # <<<<<<<<<<<<<< @@ -2288,7 +2279,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_word2_index = (__pyx_v_subwords_index[0]); - /* "gensim/models/fasttext_inner.pyx":106 + /* "gensim/models/fasttext_inner.pyx":101 * cdef long long a, b * cdef np.uint32_t word2_index = subwords_index[0] * cdef long long row1 = word2_index * size, row2, sgn # <<<<<<<<<<<<<< @@ -2297,7 +2288,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":109 + /* "gensim/models/fasttext_inner.pyx":104 * cdef REAL_t f, g, f_dot, lprob * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2306,7 +2297,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":110 + /* "gensim/models/fasttext_inner.pyx":105 * * memset(work, 0, size * cython.sizeof(REAL_t)) * memset(l1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2315,7 +2306,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ memset(__pyx_v_l1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":112 + /* "gensim/models/fasttext_inner.pyx":107 * memset(l1, 0, size * cython.sizeof(REAL_t)) * * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) # <<<<<<<<<<<<<< @@ -2324,7 +2315,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_6gensim_6models_14word2vec_inner_scopy((&__pyx_v_size), (&(__pyx_v_syn0_vocab[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":113 + /* "gensim/models/fasttext_inner.pyx":108 * * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) * for d in range(1, subwords_len): # <<<<<<<<<<<<<< @@ -2335,7 +2326,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":114 + /* "gensim/models/fasttext_inner.pyx":109 * scopy(&size, &syn0_vocab[row1], &ONE, l1, &ONE) * for d in range(1, subwords_len): * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) # <<<<<<<<<<<<<< @@ -2345,7 +2336,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_ngrams[((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); } - /* "gensim/models/fasttext_inner.pyx":115 + /* "gensim/models/fasttext_inner.pyx":110 * for d in range(1, subwords_len): * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) * cdef REAL_t norm_factor = ONEF / subwords_len # <<<<<<<<<<<<<< @@ -2354,7 +2345,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_norm_factor = (__pyx_v_6gensim_6models_14fasttext_inner_ONEF / __pyx_v_subwords_len); - /* "gensim/models/fasttext_inner.pyx":116 + /* "gensim/models/fasttext_inner.pyx":111 * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_index[d] * size], &ONE, l1, &ONE) * cdef REAL_t norm_factor = ONEF / subwords_len * sscal(&size, &norm_factor, l1 , &ONE) # <<<<<<<<<<<<<< @@ -2363,7 +2354,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_norm_factor), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":118 + /* "gensim/models/fasttext_inner.pyx":113 * sscal(&size, &norm_factor, l1 , &ONE) * * for b in range(codelen): # <<<<<<<<<<<<<< @@ -2374,7 +2365,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { __pyx_v_b = __pyx_t_4; - /* "gensim/models/fasttext_inner.pyx":119 + /* "gensim/models/fasttext_inner.pyx":114 * * for b in range(codelen): * row2 = word_point[b] * size # <<<<<<<<<<<<<< @@ -2383,7 +2374,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":120 + /* "gensim/models/fasttext_inner.pyx":115 * for b in range(codelen): * row2 = word_point[b] * size * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -2392,7 +2383,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_f_dot = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_l1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":121 + /* "gensim/models/fasttext_inner.pyx":116 * row2 = word_point[b] * size * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2410,7 +2401,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t __pyx_L8_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":122 + /* "gensim/models/fasttext_inner.pyx":117 * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2419,7 +2410,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ goto __pyx_L5_continue; - /* "gensim/models/fasttext_inner.pyx":121 + /* "gensim/models/fasttext_inner.pyx":116 * row2 = word_point[b] * size * f_dot = our_dot(&size, l1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2428,7 +2419,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ } - /* "gensim/models/fasttext_inner.pyx":123 + /* "gensim/models/fasttext_inner.pyx":118 * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2437,7 +2428,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f_dot + 6.0) * 83.0))]); - /* "gensim/models/fasttext_inner.pyx":124 + /* "gensim/models/fasttext_inner.pyx":119 * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< @@ -2446,7 +2437,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/fasttext_inner.pyx":126 + /* "gensim/models/fasttext_inner.pyx":121 * g = (1 - word_code[b] - f) * alpha * * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2455,7 +2446,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":127 + /* "gensim/models/fasttext_inner.pyx":122 * * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, l1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -2466,7 +2457,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t __pyx_L5_continue:; } - /* "gensim/models/fasttext_inner.pyx":129 + /* "gensim/models/fasttext_inner.pyx":124 * our_saxpy(&size, &g, l1, &ONE, &syn1[row2], &ONE) * * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) # <<<<<<<<<<<<<< @@ -2475,7 +2466,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[__pyx_v_word2_index])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":130 + /* "gensim/models/fasttext_inner.pyx":125 * * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) * for d in range(1, subwords_len): # <<<<<<<<<<<<<< @@ -2486,7 +2477,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t for (__pyx_t_2 = 1; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":131 + /* "gensim/models/fasttext_inner.pyx":126 * our_saxpy(&size, &word_locks_vocab[word2_index], work, &ONE, &syn0_vocab[row1], &ONE) * for d in range(1, subwords_len): * our_saxpy(&size, &word_locks_ngrams[subwords_index[d]], work, &ONE, &syn0_ngrams[subwords_index[d]*size], &ONE) # <<<<<<<<<<<<<< @@ -2496,7 +2487,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_ngrams[(__pyx_v_subwords_index[__pyx_v_d])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_ngrams[((__pyx_v_subwords_index[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); } - /* "gensim/models/fasttext_inner.pyx":97 + /* "gensim/models/fasttext_inner.pyx":92 * * * cdef void fast_sentence_sg_hs( # <<<<<<<<<<<<<< @@ -2507,7 +2498,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs(__pyx_t /* function exit code */ } -/* "gensim/models/fasttext_inner.pyx":134 +/* "gensim/models/fasttext_inner.pyx":129 * * * cdef unsigned long long fast_sentence_cbow_neg( # <<<<<<<<<<<<<< @@ -2537,7 +2528,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente long __pyx_t_6; int __pyx_t_7; - /* "gensim/models/fasttext_inner.pyx":143 + /* "gensim/models/fasttext_inner.pyx":138 * cdef long long a * cdef long long row2 * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< @@ -2546,7 +2537,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_modulo = 281474976710655ULL; - /* "gensim/models/fasttext_inner.pyx":144 + /* "gensim/models/fasttext_inner.pyx":139 * cdef long long row2 * cdef unsigned long long modulo = 281474976710655ULL * cdef REAL_t f, g, count, inv_count = 1.0, label, log_e_f_dot, f_dot # <<<<<<<<<<<<<< @@ -2555,7 +2546,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_inv_count = 1.0; - /* "gensim/models/fasttext_inner.pyx":148 + /* "gensim/models/fasttext_inner.pyx":143 * cdef int d, m * * word_index = indexes[i] # <<<<<<<<<<<<<< @@ -2564,7 +2555,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_word_index = (__pyx_v_indexes[__pyx_v_i]); - /* "gensim/models/fasttext_inner.pyx":150 + /* "gensim/models/fasttext_inner.pyx":145 * word_index = indexes[i] * * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2573,7 +2564,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":151 + /* "gensim/models/fasttext_inner.pyx":146 * * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< @@ -2582,7 +2573,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_count = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); - /* "gensim/models/fasttext_inner.pyx":152 + /* "gensim/models/fasttext_inner.pyx":147 * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 * for m in range(j, k): # <<<<<<<<<<<<<< @@ -2593,144 +2584,142 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":153 + /* "gensim/models/fasttext_inner.pyx":148 * count = 0.0 * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< * continue - * else: + * count += ONEF */ __pyx_t_3 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":154 + /* "gensim/models/fasttext_inner.pyx":149 * for m in range(j, k): * if m == i: * continue # <<<<<<<<<<<<<< - * else: - * count += ONEF + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) */ goto __pyx_L3_continue; - /* "gensim/models/fasttext_inner.pyx":153 + /* "gensim/models/fasttext_inner.pyx":148 * count = 0.0 * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< * continue - * else: + * count += ONEF */ } - /* "gensim/models/fasttext_inner.pyx":156 + /* "gensim/models/fasttext_inner.pyx":150 + * if m == i: * continue - * else: - * count += ONEF # <<<<<<<<<<<<<< - * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) - * for d in range(subwords_idx_len[m]): + * count += ONEF # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) + * for d in range(subwords_idx_len[m]): */ - /*else*/ { - __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14fasttext_inner_ONEF); + __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14fasttext_inner_ONEF); - /* "gensim/models/fasttext_inner.pyx":157 - * else: + /* "gensim/models/fasttext_inner.pyx":151 + * continue + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< + * for d in range(subwords_idx_len[m]): * count += ONEF - * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< - * for d in range(subwords_idx_len[m]): - * count += ONEF */ - __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":158 + /* "gensim/models/fasttext_inner.pyx":152 + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) + * for d in range(subwords_idx_len[m]): # <<<<<<<<<<<<<< * count += ONEF - * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) - * for d in range(subwords_idx_len[m]): # <<<<<<<<<<<<<< - * count += ONEF - * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) */ - __pyx_t_4 = (__pyx_v_subwords_idx_len[__pyx_v_m]); - for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { - __pyx_v_d = __pyx_t_5; + __pyx_t_4 = (__pyx_v_subwords_idx_len[__pyx_v_m]); + for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { + __pyx_v_d = __pyx_t_5; - /* "gensim/models/fasttext_inner.pyx":159 - * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) - * for d in range(subwords_idx_len[m]): - * count += ONEF # <<<<<<<<<<<<<< - * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + /* "gensim/models/fasttext_inner.pyx":153 + * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) + * for d in range(subwords_idx_len[m]): + * count += ONEF # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) * */ - __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14fasttext_inner_ONEF); + __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14fasttext_inner_ONEF); - /* "gensim/models/fasttext_inner.pyx":160 - * for d in range(subwords_idx_len[m]): - * count += ONEF - * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< + /* "gensim/models/fasttext_inner.pyx":154 + * for d in range(subwords_idx_len[m]): + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< * * if count > (0.5): */ - __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_ngrams[(((__pyx_v_subwords_idx[__pyx_v_m])[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - } + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_ngrams[(((__pyx_v_subwords_idx[__pyx_v_m])[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); } __pyx_L3_continue:; } - /* "gensim/models/fasttext_inner.pyx":162 - * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + /* "gensim/models/fasttext_inner.pyx":156 + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) * * if count > (0.5): # <<<<<<<<<<<<<< - * inv_count = ONEF/count + * inv_count = ONEF / count * if cbow_mean: */ __pyx_t_3 = ((__pyx_v_count > ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":163 + /* "gensim/models/fasttext_inner.pyx":157 * * if count > (0.5): - * inv_count = ONEF/count # <<<<<<<<<<<<<< + * inv_count = ONEF / count # <<<<<<<<<<<<<< * if cbow_mean: - * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * sscal(&size, &inv_count, neu1, &ONE) */ __pyx_v_inv_count = (__pyx_v_6gensim_6models_14fasttext_inner_ONEF / __pyx_v_count); - /* "gensim/models/fasttext_inner.pyx":162 - * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + /* "gensim/models/fasttext_inner.pyx":156 + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) * * if count > (0.5): # <<<<<<<<<<<<<< - * inv_count = ONEF/count + * inv_count = ONEF / count * if cbow_mean: */ } - /* "gensim/models/fasttext_inner.pyx":164 + /* "gensim/models/fasttext_inner.pyx":158 * if count > (0.5): - * inv_count = ONEF/count + * inv_count = ONEF / count * if cbow_mean: # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * sscal(&size, &inv_count, neu1, &ONE) * */ __pyx_t_3 = (__pyx_v_cbow_mean != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":165 - * inv_count = ONEF/count + /* "gensim/models/fasttext_inner.pyx":159 + * inv_count = ONEF / count * if cbow_mean: - * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, neu1, &ONE) # <<<<<<<<<<<<<< * * memset(work, 0, size * cython.sizeof(REAL_t)) */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":164 + /* "gensim/models/fasttext_inner.pyx":158 * if count > (0.5): - * inv_count = ONEF/count + * inv_count = ONEF / count * if cbow_mean: # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * sscal(&size, &inv_count, neu1, &ONE) * */ } - /* "gensim/models/fasttext_inner.pyx":167 - * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + /* "gensim/models/fasttext_inner.pyx":161 + * sscal(&size, &inv_count, neu1, &ONE) * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< * @@ -2738,7 +2727,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":169 + /* "gensim/models/fasttext_inner.pyx":163 * memset(work, 0, size * cython.sizeof(REAL_t)) * * for d in range(negative+1): # <<<<<<<<<<<<<< @@ -2749,7 +2738,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_6; __pyx_t_1+=1) { __pyx_v_d = __pyx_t_1; - /* "gensim/models/fasttext_inner.pyx":170 + /* "gensim/models/fasttext_inner.pyx":164 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -2759,94 +2748,90 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_3 = ((__pyx_v_d == 0) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":171 + /* "gensim/models/fasttext_inner.pyx":165 * for d in range(negative+1): * if d == 0: * target_index = word_index # <<<<<<<<<<<<<< * label = ONEF - * else: + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) */ __pyx_v_target_index = __pyx_v_word_index; - /* "gensim/models/fasttext_inner.pyx":172 + /* "gensim/models/fasttext_inner.pyx":166 * if d == 0: * target_index = word_index * label = ONEF # <<<<<<<<<<<<<< - * else: - * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + * next_random = (next_random * 25214903917ULL + 11) & modulo */ __pyx_v_label = __pyx_v_6gensim_6models_14fasttext_inner_ONEF; - /* "gensim/models/fasttext_inner.pyx":170 + /* "gensim/models/fasttext_inner.pyx":164 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< * target_index = word_index * label = ONEF */ - goto __pyx_L12; } - /* "gensim/models/fasttext_inner.pyx":174 + /* "gensim/models/fasttext_inner.pyx":167 + * target_index = word_index * label = ONEF - * else: - * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) # <<<<<<<<<<<<<< - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) # <<<<<<<<<<<<<< + * next_random = (next_random * 25214903917ULL + 11) & modulo + * if target_index == word_index: */ - /*else*/ { - __pyx_v_target_index = __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_v_cum_table, ((__pyx_v_next_random >> 16) % (__pyx_v_cum_table[(__pyx_v_cum_table_len - 1)])), 0, __pyx_v_cum_table_len); + __pyx_v_target_index = __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_v_cum_table, ((__pyx_v_next_random >> 16) % (__pyx_v_cum_table[(__pyx_v_cum_table_len - 1)])), 0, __pyx_v_cum_table_len); - /* "gensim/models/fasttext_inner.pyx":175 - * else: - * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) - * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< - * if target_index == word_index: - * continue + /* "gensim/models/fasttext_inner.pyx":168 + * label = ONEF + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< + * if target_index == word_index: + * continue */ - __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); + __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "gensim/models/fasttext_inner.pyx":176 - * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: # <<<<<<<<<<<<<< - * continue - * label = 0.0 + /* "gensim/models/fasttext_inner.pyx":169 + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + * next_random = (next_random * 25214903917ULL + 11) & modulo + * if target_index == word_index: # <<<<<<<<<<<<<< + * continue + * label = 0.0 */ - __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); - if (__pyx_t_3) { + __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); + if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":177 - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: - * continue # <<<<<<<<<<<<<< - * label = 0.0 + /* "gensim/models/fasttext_inner.pyx":170 + * next_random = (next_random * 25214903917ULL + 11) & modulo + * if target_index == word_index: + * continue # <<<<<<<<<<<<<< + * label = 0.0 * */ - goto __pyx_L10_continue; + goto __pyx_L10_continue; - /* "gensim/models/fasttext_inner.pyx":176 - * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: # <<<<<<<<<<<<<< - * continue - * label = 0.0 + /* "gensim/models/fasttext_inner.pyx":169 + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + * next_random = (next_random * 25214903917ULL + 11) & modulo + * if target_index == word_index: # <<<<<<<<<<<<<< + * continue + * label = 0.0 */ - } + } - /* "gensim/models/fasttext_inner.pyx":178 - * if target_index == word_index: - * continue - * label = 0.0 # <<<<<<<<<<<<<< + /* "gensim/models/fasttext_inner.pyx":171 + * if target_index == word_index: + * continue + * label = 0.0 # <<<<<<<<<<<<<< * * row2 = target_index * size */ - __pyx_v_label = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); - } - __pyx_L12:; + __pyx_v_label = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); - /* "gensim/models/fasttext_inner.pyx":180 - * label = 0.0 + /* "gensim/models/fasttext_inner.pyx":173 + * label = 0.0 * * row2 = target_index * size # <<<<<<<<<<<<<< * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) @@ -2854,7 +2839,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":181 + /* "gensim/models/fasttext_inner.pyx":174 * * row2 = target_index * size * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2863,7 +2848,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_f_dot = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":182 + /* "gensim/models/fasttext_inner.pyx":175 * row2 = target_index * size * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2881,7 +2866,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_L15_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":183 + /* "gensim/models/fasttext_inner.pyx":176 * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2890,7 +2875,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ goto __pyx_L10_continue; - /* "gensim/models/fasttext_inner.pyx":182 + /* "gensim/models/fasttext_inner.pyx":175 * row2 = target_index * size * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2899,7 +2884,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":184 + /* "gensim/models/fasttext_inner.pyx":177 * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2908,7 +2893,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f_dot + 6.0) * 83.0))]); - /* "gensim/models/fasttext_inner.pyx":185 + /* "gensim/models/fasttext_inner.pyx":178 * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< @@ -2917,7 +2902,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/fasttext_inner.pyx":187 + /* "gensim/models/fasttext_inner.pyx":180 * g = (label - f) * alpha * * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2926,7 +2911,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":188 + /* "gensim/models/fasttext_inner.pyx":181 * * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2937,36 +2922,36 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_L10_continue:; } - /* "gensim/models/fasttext_inner.pyx":190 + /* "gensim/models/fasttext_inner.pyx":183 * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) * * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + * sscal(&size, &inv_count, work, &ONE) * */ __pyx_t_3 = ((!(__pyx_v_cbow_mean != 0)) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":191 + /* "gensim/models/fasttext_inner.pyx":184 * * if not cbow_mean: # divide error over summed window vectors - * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, work, &ONE) # <<<<<<<<<<<<<< * * for m in range(j,k): */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":190 + /* "gensim/models/fasttext_inner.pyx":183 * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) * * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + * sscal(&size, &inv_count, work, &ONE) * */ } - /* "gensim/models/fasttext_inner.pyx":193 - * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + /* "gensim/models/fasttext_inner.pyx":186 + * sscal(&size, &inv_count, work, &ONE) * * for m in range(j,k): # <<<<<<<<<<<<<< * if m == i: @@ -2976,7 +2961,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":194 + /* "gensim/models/fasttext_inner.pyx":187 * * for m in range(j,k): * if m == i: # <<<<<<<<<<<<<< @@ -2986,7 +2971,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_3 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":195 + /* "gensim/models/fasttext_inner.pyx":188 * for m in range(j,k): * if m == i: * continue # <<<<<<<<<<<<<< @@ -2995,7 +2980,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ goto __pyx_L18_continue; - /* "gensim/models/fasttext_inner.pyx":194 + /* "gensim/models/fasttext_inner.pyx":187 * * for m in range(j,k): * if m == i: # <<<<<<<<<<<<<< @@ -3004,7 +2989,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":197 + /* "gensim/models/fasttext_inner.pyx":190 * continue * else: * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< @@ -3014,7 +2999,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente /*else*/ { __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":198 + /* "gensim/models/fasttext_inner.pyx":191 * else: * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) * for d in range(subwords_idx_len[m]): # <<<<<<<<<<<<<< @@ -3025,7 +3010,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { __pyx_v_d = __pyx_t_5; - /* "gensim/models/fasttext_inner.pyx":199 + /* "gensim/models/fasttext_inner.pyx":192 * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) * for d in range(subwords_idx_len[m]): * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) # <<<<<<<<<<<<<< @@ -3038,7 +3023,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_L18_continue:; } - /* "gensim/models/fasttext_inner.pyx":201 + /* "gensim/models/fasttext_inner.pyx":194 * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) * * return next_random # <<<<<<<<<<<<<< @@ -3048,7 +3033,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_r = __pyx_v_next_random; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":134 + /* "gensim/models/fasttext_inner.pyx":129 * * * cdef unsigned long long fast_sentence_cbow_neg( # <<<<<<<<<<<<<< @@ -3061,7 +3046,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente return __pyx_r; } -/* "gensim/models/fasttext_inner.pyx":204 +/* "gensim/models/fasttext_inner.pyx":197 * * * cdef void fast_sentence_cbow_hs( # <<<<<<<<<<<<<< @@ -3087,7 +3072,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx PY_LONG_LONG __pyx_t_6; int __pyx_t_7; - /* "gensim/models/fasttext_inner.pyx":213 + /* "gensim/models/fasttext_inner.pyx":206 * cdef long long a, b * cdef long long row2, sgn * cdef REAL_t f, g, count, inv_count = 1.0, f_dot, lprob # <<<<<<<<<<<<<< @@ -3096,7 +3081,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_inv_count = 1.0; - /* "gensim/models/fasttext_inner.pyx":216 + /* "gensim/models/fasttext_inner.pyx":209 * cdef int m * * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -3105,7 +3090,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":217 + /* "gensim/models/fasttext_inner.pyx":210 * * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< @@ -3114,7 +3099,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_count = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); - /* "gensim/models/fasttext_inner.pyx":218 + /* "gensim/models/fasttext_inner.pyx":211 * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 * for m in range(j, k): # <<<<<<<<<<<<<< @@ -3125,144 +3110,142 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":219 + /* "gensim/models/fasttext_inner.pyx":212 * count = 0.0 * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< * continue - * else: + * count += ONEF */ __pyx_t_3 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":220 + /* "gensim/models/fasttext_inner.pyx":213 * for m in range(j, k): * if m == i: * continue # <<<<<<<<<<<<<< - * else: - * count += ONEF + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) */ goto __pyx_L3_continue; - /* "gensim/models/fasttext_inner.pyx":219 + /* "gensim/models/fasttext_inner.pyx":212 * count = 0.0 * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< * continue - * else: + * count += ONEF */ } - /* "gensim/models/fasttext_inner.pyx":222 + /* "gensim/models/fasttext_inner.pyx":214 + * if m == i: * continue - * else: - * count += ONEF # <<<<<<<<<<<<<< - * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) - * for d in range(subwords_idx_len[m]): + * count += ONEF # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) + * for d in range(subwords_idx_len[m]): */ - /*else*/ { - __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14fasttext_inner_ONEF); + __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14fasttext_inner_ONEF); - /* "gensim/models/fasttext_inner.pyx":223 - * else: + /* "gensim/models/fasttext_inner.pyx":215 + * continue + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< + * for d in range(subwords_idx_len[m]): * count += ONEF - * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< - * for d in range(subwords_idx_len[m]): - * count += ONEF */ - __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":224 + /* "gensim/models/fasttext_inner.pyx":216 + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) + * for d in range(subwords_idx_len[m]): # <<<<<<<<<<<<<< * count += ONEF - * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) - * for d in range(subwords_idx_len[m]): # <<<<<<<<<<<<<< - * count += ONEF - * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) */ - __pyx_t_4 = (__pyx_v_subwords_idx_len[__pyx_v_m]); - for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { - __pyx_v_d = __pyx_t_5; + __pyx_t_4 = (__pyx_v_subwords_idx_len[__pyx_v_m]); + for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { + __pyx_v_d = __pyx_t_5; - /* "gensim/models/fasttext_inner.pyx":225 - * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) - * for d in range(subwords_idx_len[m]): - * count += ONEF # <<<<<<<<<<<<<< - * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + /* "gensim/models/fasttext_inner.pyx":217 + * our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) + * for d in range(subwords_idx_len[m]): + * count += ONEF # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) * if count > (0.5): */ - __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14fasttext_inner_ONEF); + __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14fasttext_inner_ONEF); - /* "gensim/models/fasttext_inner.pyx":226 - * for d in range(subwords_idx_len[m]): - * count += ONEF - * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< + /* "gensim/models/fasttext_inner.pyx":218 + * for d in range(subwords_idx_len[m]): + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< * if count > (0.5): - * inv_count = ONEF/count + * inv_count = ONEF / count */ - __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_ngrams[(((__pyx_v_subwords_idx[__pyx_v_m])[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - } + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_14fasttext_inner_ONEF), (&(__pyx_v_syn0_ngrams[(((__pyx_v_subwords_idx[__pyx_v_m])[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); } __pyx_L3_continue:; } - /* "gensim/models/fasttext_inner.pyx":227 - * count += ONEF - * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + /* "gensim/models/fasttext_inner.pyx":219 + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) * if count > (0.5): # <<<<<<<<<<<<<< - * inv_count = ONEF/count + * inv_count = ONEF / count * if cbow_mean: */ __pyx_t_3 = ((__pyx_v_count > ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":228 - * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + /* "gensim/models/fasttext_inner.pyx":220 + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) * if count > (0.5): - * inv_count = ONEF/count # <<<<<<<<<<<<<< + * inv_count = ONEF / count # <<<<<<<<<<<<<< * if cbow_mean: - * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * sscal(&size, &inv_count, neu1, &ONE) */ __pyx_v_inv_count = (__pyx_v_6gensim_6models_14fasttext_inner_ONEF / __pyx_v_count); - /* "gensim/models/fasttext_inner.pyx":227 - * count += ONEF - * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + /* "gensim/models/fasttext_inner.pyx":219 + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) * if count > (0.5): # <<<<<<<<<<<<<< - * inv_count = ONEF/count + * inv_count = ONEF / count * if cbow_mean: */ } - /* "gensim/models/fasttext_inner.pyx":229 + /* "gensim/models/fasttext_inner.pyx":221 * if count > (0.5): - * inv_count = ONEF/count + * inv_count = ONEF / count * if cbow_mean: # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * sscal(&size, &inv_count, neu1, &ONE) * */ __pyx_t_3 = (__pyx_v_cbow_mean != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":230 - * inv_count = ONEF/count + /* "gensim/models/fasttext_inner.pyx":222 + * inv_count = ONEF / count * if cbow_mean: - * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, neu1, &ONE) # <<<<<<<<<<<<<< * * memset(work, 0, size * cython.sizeof(REAL_t)) */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":229 + /* "gensim/models/fasttext_inner.pyx":221 * if count > (0.5): - * inv_count = ONEF/count + * inv_count = ONEF / count * if cbow_mean: # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * sscal(&size, &inv_count, neu1, &ONE) * */ } - /* "gensim/models/fasttext_inner.pyx":232 - * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + /* "gensim/models/fasttext_inner.pyx":224 + * sscal(&size, &inv_count, neu1, &ONE) * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< * for b in range(codelens[i]): @@ -3270,7 +3253,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/fasttext_inner.pyx":233 + /* "gensim/models/fasttext_inner.pyx":225 * * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelens[i]): # <<<<<<<<<<<<<< @@ -3281,7 +3264,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_1; __pyx_t_6+=1) { __pyx_v_b = __pyx_t_6; - /* "gensim/models/fasttext_inner.pyx":234 + /* "gensim/models/fasttext_inner.pyx":226 * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelens[i]): * row2 = word_point[b] * size # <<<<<<<<<<<<<< @@ -3290,7 +3273,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":235 + /* "gensim/models/fasttext_inner.pyx":227 * for b in range(codelens[i]): * row2 = word_point[b] * size * f_dot = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -3299,7 +3282,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_f_dot = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":236 + /* "gensim/models/fasttext_inner.pyx":228 * row2 = word_point[b] * size * f_dot = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -3317,7 +3300,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_L13_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":237 + /* "gensim/models/fasttext_inner.pyx":229 * f_dot = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -3326,7 +3309,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ goto __pyx_L10_continue; - /* "gensim/models/fasttext_inner.pyx":236 + /* "gensim/models/fasttext_inner.pyx":228 * row2 = word_point[b] * size * f_dot = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -3335,7 +3318,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ } - /* "gensim/models/fasttext_inner.pyx":238 + /* "gensim/models/fasttext_inner.pyx":230 * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -3344,7 +3327,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f_dot + 6.0) * 83.0))]); - /* "gensim/models/fasttext_inner.pyx":239 + /* "gensim/models/fasttext_inner.pyx":231 * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< @@ -3353,7 +3336,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/fasttext_inner.pyx":241 + /* "gensim/models/fasttext_inner.pyx":233 * g = (1 - word_code[b] - f) * alpha * * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -3362,7 +3345,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":242 + /* "gensim/models/fasttext_inner.pyx":234 * * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -3373,36 +3356,36 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx __pyx_L10_continue:; } - /* "gensim/models/fasttext_inner.pyx":244 + /* "gensim/models/fasttext_inner.pyx":236 * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) * * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + * sscal(&size, &inv_count, work, &ONE) * */ __pyx_t_3 = ((!(__pyx_v_cbow_mean != 0)) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":245 + /* "gensim/models/fasttext_inner.pyx":237 * * if not cbow_mean: # divide error over summed window vectors - * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, work, &ONE) # <<<<<<<<<<<<<< * * for m in range(j,k): */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":244 + /* "gensim/models/fasttext_inner.pyx":236 * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) * * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + * sscal(&size, &inv_count, work, &ONE) * */ } - /* "gensim/models/fasttext_inner.pyx":247 - * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + /* "gensim/models/fasttext_inner.pyx":239 + * sscal(&size, &inv_count, work, &ONE) * * for m in range(j,k): # <<<<<<<<<<<<<< * if m == i: @@ -3412,69 +3395,67 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":248 + /* "gensim/models/fasttext_inner.pyx":240 * * for m in range(j,k): * if m == i: # <<<<<<<<<<<<<< * continue - * else: + * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) */ __pyx_t_3 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":249 + /* "gensim/models/fasttext_inner.pyx":241 * for m in range(j,k): * if m == i: * continue # <<<<<<<<<<<<<< - * else: - * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) + * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) + * for d in range(subwords_idx_len[m]): */ goto __pyx_L16_continue; - /* "gensim/models/fasttext_inner.pyx":248 + /* "gensim/models/fasttext_inner.pyx":240 * * for m in range(j,k): * if m == i: # <<<<<<<<<<<<<< * continue - * else: + * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) */ } - /* "gensim/models/fasttext_inner.pyx":251 + /* "gensim/models/fasttext_inner.pyx":242 + * if m == i: * continue - * else: - * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< - * for d in range(subwords_idx_len[m]): - * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) + * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< + * for d in range(subwords_idx_len[m]): + * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) */ - /*else*/ { - __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":252 - * else: - * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) - * for d in range(subwords_idx_len[m]): # <<<<<<<<<<<<<< - * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) + /* "gensim/models/fasttext_inner.pyx":243 + * continue + * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) + * for d in range(subwords_idx_len[m]): # <<<<<<<<<<<<<< + * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) * */ - __pyx_t_4 = (__pyx_v_subwords_idx_len[__pyx_v_m]); - for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { - __pyx_v_d = __pyx_t_5; + __pyx_t_4 = (__pyx_v_subwords_idx_len[__pyx_v_m]); + for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { + __pyx_v_d = __pyx_t_5; - /* "gensim/models/fasttext_inner.pyx":253 - * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) - * for d in range(subwords_idx_len[m]): - * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) # <<<<<<<<<<<<<< + /* "gensim/models/fasttext_inner.pyx":244 + * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) + * for d in range(subwords_idx_len[m]): + * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) # <<<<<<<<<<<<<< * * */ - __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_ngrams[((__pyx_v_subwords_idx[__pyx_v_m])[__pyx_v_d])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_ngrams[(((__pyx_v_subwords_idx[__pyx_v_m])[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - } + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_ngrams[((__pyx_v_subwords_idx[__pyx_v_m])[__pyx_v_d])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_ngrams[(((__pyx_v_subwords_idx[__pyx_v_m])[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); } __pyx_L16_continue:; } - /* "gensim/models/fasttext_inner.pyx":204 + /* "gensim/models/fasttext_inner.pyx":197 * * * cdef void fast_sentence_cbow_hs( # <<<<<<<<<<<<<< @@ -3485,7 +3466,7 @@ static void __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs(__pyx /* function exit code */ } -/* "gensim/models/fasttext_inner.pyx":256 +/* "gensim/models/fasttext_inner.pyx":247 * * * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< @@ -3534,29 +3515,29 @@ static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_1train_batch_sg(PyObj case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentences)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 1); __PYX_ERR(0, 256, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 1); __PYX_ERR(0, 247, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 2); __PYX_ERR(0, 256, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 2); __PYX_ERR(0, 247, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 3); __PYX_ERR(0, 256, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 3); __PYX_ERR(0, 247, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 4: if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_l1)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 4); __PYX_ERR(0, 256, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, 4); __PYX_ERR(0, 247, __pyx_L3_error) } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_batch_sg") < 0)) __PYX_ERR(0, 256, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_batch_sg") < 0)) __PYX_ERR(0, 247, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 5) { goto __pyx_L5_argtuple_error; @@ -3575,7 +3556,7 @@ static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_1train_batch_sg(PyObj } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 256, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_sg", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 247, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_sg", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -3659,142 +3640,142 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON int __pyx_t_24; __Pyx_RefNannySetupContext("train_batch_sg", 0); - /* "gensim/models/fasttext_inner.pyx":257 + /* "gensim/models/fasttext_inner.pyx":248 * * def train_batch_sg(model, sentences, alpha, _work, _l1): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int sample = (model.sample != 0) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 257, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 248, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 257, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 248, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":258 + /* "gensim/models/fasttext_inner.pyx":249 * def train_batch_sg(model, sentences, alpha, _work, _l1): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int sample = (model.sample != 0) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 258, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 249, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 258, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 249, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":259 + /* "gensim/models/fasttext_inner.pyx":250 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int sample = (model.sample != 0) # <<<<<<<<<<<<<< * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_sample); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 259, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_sample); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 250, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = PyObject_RichCompare(__pyx_t_1, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 259, __pyx_L1_error) + __pyx_t_3 = PyObject_RichCompare(__pyx_t_1, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 250, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 259, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 250, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_sample = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":261 + /* "gensim/models/fasttext_inner.pyx":252 * cdef int sample = (model.sample != 0) * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 261, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 252, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn0_vocab); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 261, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn0_vocab); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 252, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 261, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 252, __pyx_L1_error) __pyx_v_syn0_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":262 + /* "gensim/models/fasttext_inner.pyx":253 * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) # <<<<<<<<<<<<<< * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_vocab_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 262, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_vocab_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 253, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 262, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 253, __pyx_L1_error) __pyx_v_word_locks_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":263 + /* "gensim/models/fasttext_inner.pyx":254 * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 263, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 254, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn0_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 263, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn0_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 254, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 263, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 254, __pyx_L1_error) __pyx_v_syn0_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":264 + /* "gensim/models/fasttext_inner.pyx":255 * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) # <<<<<<<<<<<<<< * * cdef REAL_t *work */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_ngrams_lockf); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 264, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_ngrams_lockf); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 255, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 264, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 255, __pyx_L1_error) __pyx_v_word_locks_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":269 + /* "gensim/models/fasttext_inner.pyx":260 * cdef REAL_t *l1 * * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 269, __pyx_L1_error) + __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 260, __pyx_L1_error) __pyx_v__alpha = __pyx_t_4; - /* "gensim/models/fasttext_inner.pyx":270 + /* "gensim/models/fasttext_inner.pyx":261 * * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 270, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 261, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 270, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 261, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_size = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":276 + /* "gensim/models/fasttext_inner.pyx":267 * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] * cdef int sentence_idx[MAX_SENTENCE_LEN + 1] * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 276, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 267, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 276, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 267, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_window = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":279 + /* "gensim/models/fasttext_inner.pyx":270 * * cdef int i, j, k * cdef int effective_words = 0, effective_sentences = 0 # <<<<<<<<<<<<<< @@ -3804,19 +3785,19 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_v_effective_words = 0; __pyx_v_effective_sentences = 0; - /* "gensim/models/fasttext_inner.pyx":299 + /* "gensim/models/fasttext_inner.pyx":290 * # dummy dictionary to ensure that the memory locations that subwords_idx point to * # are referenced throughout so that it isn't put back to free memory pool by Python's memory manager * subword_arrays = {} # <<<<<<<<<<<<<< * * if hs: */ - __pyx_t_3 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 299, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 290, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_v_subword_arrays = ((PyObject*)__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":301 + /* "gensim/models/fasttext_inner.pyx":292 * subword_arrays = {} * * if hs: # <<<<<<<<<<<<<< @@ -3826,20 +3807,20 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":302 + /* "gensim/models/fasttext_inner.pyx":293 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 302, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 293, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 302, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 293, __pyx_L1_error) __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":301 + /* "gensim/models/fasttext_inner.pyx":292 * subword_arrays = {} * * if hs: # <<<<<<<<<<<<<< @@ -3848,7 +3829,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":304 + /* "gensim/models/fasttext_inner.pyx":295 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -3858,46 +3839,46 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":305 + /* "gensim/models/fasttext_inner.pyx":296 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 305, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 296, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 305, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 296, __pyx_L1_error) __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":306 + /* "gensim/models/fasttext_inner.pyx":297 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) # <<<<<<<<<<<<<< * cum_table_len = len(model.cum_table) * if negative or sample: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 306, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 297, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 306, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 297, __pyx_L1_error) __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":307 + /* "gensim/models/fasttext_inner.pyx":298 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) # <<<<<<<<<<<<<< * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 307, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 298, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == ((Py_ssize_t)-1))) __PYX_ERR(0, 307, __pyx_L1_error) + __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == ((Py_ssize_t)-1))) __PYX_ERR(0, 298, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cum_table_len = __pyx_t_6; - /* "gensim/models/fasttext_inner.pyx":304 + /* "gensim/models/fasttext_inner.pyx":295 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -3906,7 +3887,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":308 + /* "gensim/models/fasttext_inner.pyx":299 * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -3924,41 +3905,41 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_L6_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":309 + /* "gensim/models/fasttext_inner.pyx":300 * cum_table_len = len(model.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 300, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 300, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 300, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 300, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 300, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 300, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 300, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 300, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 300, __pyx_L1_error) __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_next_random = __pyx_t_9; - /* "gensim/models/fasttext_inner.pyx":308 + /* "gensim/models/fasttext_inner.pyx":299 * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -3967,42 +3948,42 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":312 + /* "gensim/models/fasttext_inner.pyx":303 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * l1 = np.PyArray_DATA(_l1) * */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 312, __pyx_L1_error) + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 303, __pyx_L1_error) __pyx_v_work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "gensim/models/fasttext_inner.pyx":313 + /* "gensim/models/fasttext_inner.pyx":304 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * l1 = np.PyArray_DATA(_l1) # <<<<<<<<<<<<<< * * # prepare C structures so we can go "full C" and release the Python GIL */ - if (!(likely(((__pyx_v__l1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__l1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 313, __pyx_L1_error) + if (!(likely(((__pyx_v__l1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__l1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 304, __pyx_L1_error) __pyx_v_l1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__l1))); - /* "gensim/models/fasttext_inner.pyx":316 + /* "gensim/models/fasttext_inner.pyx":307 * * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab # <<<<<<<<<<<<<< * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 316, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 307, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 316, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 307, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_vlookup = __pyx_t_3; __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":317 + /* "gensim/models/fasttext_inner.pyx":308 * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 # <<<<<<<<<<<<<< @@ -4011,7 +3992,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ (__pyx_v_sentence_idx[0]) = 0; - /* "gensim/models/fasttext_inner.pyx":318 + /* "gensim/models/fasttext_inner.pyx":309 * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: # <<<<<<<<<<<<<< @@ -4022,26 +4003,26 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_3 = __pyx_v_sentences; __Pyx_INCREF(__pyx_t_3); __pyx_t_6 = 0; __pyx_t_10 = NULL; } else { - __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 318, __pyx_L1_error) + __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 309, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 318, __pyx_L1_error) + __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 309, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_10)) { if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 318, __pyx_L1_error) + __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 309, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 318, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 309, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 318, __pyx_L1_error) + __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 309, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 318, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 309, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } @@ -4051,7 +4032,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 318, __pyx_L1_error) + else __PYX_ERR(0, 309, __pyx_L1_error) } break; } @@ -4060,18 +4041,18 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF_SET(__pyx_v_sent, __pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":319 + /* "gensim/models/fasttext_inner.pyx":310 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: */ - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 319, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 310, __pyx_L1_error) __pyx_t_7 = ((!__pyx_t_5) != 0); if (__pyx_t_7) { - /* "gensim/models/fasttext_inner.pyx":320 + /* "gensim/models/fasttext_inner.pyx":311 * for sent in sentences: * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged # <<<<<<<<<<<<<< @@ -4080,7 +4061,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L8_continue; - /* "gensim/models/fasttext_inner.pyx":319 + /* "gensim/models/fasttext_inner.pyx":310 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< @@ -4089,7 +4070,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":321 + /* "gensim/models/fasttext_inner.pyx":312 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< @@ -4100,26 +4081,26 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_8 = __pyx_v_sent; __Pyx_INCREF(__pyx_t_8); __pyx_t_11 = 0; __pyx_t_12 = NULL; } else { - __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 312, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 312, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_12)) { if (likely(PyList_CheckExact(__pyx_t_8))) { if (__pyx_t_11 >= PyList_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 312, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 312, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_11 >= PyTuple_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 312, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 321, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 312, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } @@ -4129,7 +4110,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 321, __pyx_L1_error) + else __PYX_ERR(0, 312, __pyx_L1_error) } break; } @@ -4138,16 +4119,16 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":322 + /* "gensim/models/fasttext_inner.pyx":313 * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: * word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window */ - __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 322, __pyx_L1_error) + __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 313, __pyx_L1_error) if ((__pyx_t_7 != 0)) { - __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 322, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 313, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __pyx_t_1 = __pyx_t_13; __pyx_t_13 = 0; @@ -4158,7 +4139,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":323 + /* "gensim/models/fasttext_inner.pyx":314 * for token in sent: * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< @@ -4169,7 +4150,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_t_7 != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":324 + /* "gensim/models/fasttext_inner.pyx":315 * word = vlookup[token] if token in vlookup else None * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window # <<<<<<<<<<<<<< @@ -4178,7 +4159,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":323 + /* "gensim/models/fasttext_inner.pyx":314 * for token in sent: * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< @@ -4187,7 +4168,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":325 + /* "gensim/models/fasttext_inner.pyx":316 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -4200,20 +4181,20 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = __pyx_t_7; goto __pyx_L15_bool_binop_done; } - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 325, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 316, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 325, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 316, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 325, __pyx_L1_error) + __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 316, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 325, __pyx_L1_error) + __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 316, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __pyx_t_5 = __pyx_t_7; __pyx_L15_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":326 + /* "gensim/models/fasttext_inner.pyx":317 * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): * continue # <<<<<<<<<<<<<< @@ -4222,7 +4203,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":325 + /* "gensim/models/fasttext_inner.pyx":316 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -4231,45 +4212,45 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":327 + /* "gensim/models/fasttext_inner.pyx":318 * if sample and word.sample_int < random_int32(&next_random): * continue * indexes[effective_words] = word.index # <<<<<<<<<<<<<< * * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] */ - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 327, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 318, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 327, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 318, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; (__pyx_v_indexes[__pyx_v_effective_words]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":329 + /* "gensim/models/fasttext_inner.pyx":320 * indexes[effective_words] = word.index * * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] # <<<<<<<<<<<<<< * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = (len(subwords) + 1) */ - __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams_word); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams_word); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_index2word); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_index2word); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_17 = PyObject_GetItem(__pyx_t_16, __pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_17 = PyObject_GetItem(__pyx_t_16, __pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_t_17); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_t_17); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; @@ -4277,9 +4258,9 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_17 = __pyx_t_13; __Pyx_INCREF(__pyx_t_17); __pyx_t_18 = 0; __pyx_t_19 = NULL; } else { - __pyx_t_18 = -1; __pyx_t_17 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_18 = -1; __pyx_t_17 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); - __pyx_t_19 = Py_TYPE(__pyx_t_17)->tp_iternext; if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_19 = Py_TYPE(__pyx_t_17)->tp_iternext; if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 320, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; for (;;) { @@ -4287,17 +4268,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON if (likely(PyList_CheckExact(__pyx_t_17))) { if (__pyx_t_18 >= PyList_GET_SIZE(__pyx_t_17)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyList_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_13 = PyList_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 320, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); #endif } else { if (__pyx_t_18 >= PyTuple_GET_SIZE(__pyx_t_17)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 320, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); #endif } @@ -4307,7 +4288,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 329, __pyx_L1_error) + else __PYX_ERR(0, 320, __pyx_L1_error) } break; } @@ -4315,58 +4296,58 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON } __Pyx_XDECREF_SET(__pyx_v_subword_i, __pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_v_subword_i); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_v_subword_i); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_13))) __PYX_ERR(0, 329, __pyx_L1_error) + if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_13))) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; } __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_XDECREF_SET(__pyx_v_subwords, ((PyObject*)__pyx_t_14)); __pyx_t_14 = 0; - /* "gensim/models/fasttext_inner.pyx":330 + /* "gensim/models/fasttext_inner.pyx":321 * * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) # <<<<<<<<<<<<<< * subwords_idx_len[effective_words] = (len(subwords) + 1) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) */ - __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 330, __pyx_L1_error) + __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 330, __pyx_L1_error) + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 330, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_13 = PyList_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 330, __pyx_L1_error) + __pyx_t_13 = PyList_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_GIVEREF(__pyx_t_14); PyList_SET_ITEM(__pyx_t_13, 0, __pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = PyNumber_Add(__pyx_t_13, __pyx_v_subwords); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 330, __pyx_L1_error) + __pyx_t_14 = PyNumber_Add(__pyx_t_13, __pyx_v_subwords); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyTuple_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 330, __pyx_L1_error) + __pyx_t_13 = PyTuple_New(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_GIVEREF(__pyx_t_14); PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 330, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 330, __pyx_L1_error) + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_uint32); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 330, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_uint32); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyDict_SetItem(__pyx_t_14, __pyx_n_s_dtype, __pyx_t_16) < 0) __PYX_ERR(0, 330, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_14, __pyx_n_s_dtype, __pyx_t_16) < 0) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __pyx_t_16 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_t_13, __pyx_t_14); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 330, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_t_13, __pyx_t_14); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; @@ -4374,39 +4355,39 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __Pyx_XDECREF_SET(__pyx_v_word_subwords, __pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":331 + /* "gensim/models/fasttext_inner.pyx":322 * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = (len(subwords) + 1) # <<<<<<<<<<<<<< * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 */ - __pyx_t_18 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_18 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 322, __pyx_L1_error) (__pyx_v_subwords_idx_len[__pyx_v_effective_words]) = ((int)(__pyx_t_18 + 1)); - /* "gensim/models/fasttext_inner.pyx":332 + /* "gensim/models/fasttext_inner.pyx":323 * word_subwords = np.array([word.index] + subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = (len(subwords) + 1) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) # <<<<<<<<<<<<<< * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords */ - if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 332, __pyx_L1_error) + if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 323, __pyx_L1_error) (__pyx_v_subwords_idx[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_subwords))); - /* "gensim/models/fasttext_inner.pyx":334 + /* "gensim/models/fasttext_inner.pyx":325 * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords # <<<<<<<<<<<<<< * * if hs: */ - __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 334, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 325, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_16, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 334, __pyx_L1_error) + if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_16, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 325, __pyx_L1_error) __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":336 + /* "gensim/models/fasttext_inner.pyx":327 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -4416,46 +4397,46 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":337 + /* "gensim/models/fasttext_inner.pyx":328 * * if hs: * codelens[effective_words] = len(word.code) # <<<<<<<<<<<<<< * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 337, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 328, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - __pyx_t_18 = PyObject_Length(__pyx_t_16); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 337, __pyx_L1_error) + __pyx_t_18 = PyObject_Length(__pyx_t_16); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 328, __pyx_L1_error) __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; (__pyx_v_codelens[__pyx_v_effective_words]) = ((int)__pyx_t_18); - /* "gensim/models/fasttext_inner.pyx":338 + /* "gensim/models/fasttext_inner.pyx":329 * if hs: * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[effective_words] = np.PyArray_DATA(word.point) * */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 338, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 329, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 338, __pyx_L1_error) + if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 329, __pyx_L1_error) (__pyx_v_codes[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_16))); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":339 + /* "gensim/models/fasttext_inner.pyx":330 * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * * effective_words += 1 */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 339, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 330, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 339, __pyx_L1_error) + if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 330, __pyx_L1_error) (__pyx_v_points[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_16))); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":336 + /* "gensim/models/fasttext_inner.pyx":327 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -4464,44 +4445,44 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":341 + /* "gensim/models/fasttext_inner.pyx":332 * points[effective_words] = np.PyArray_DATA(word.point) * * effective_words += 1 # <<<<<<<<<<<<<< * if effective_words == MAX_SENTENCE_LEN: - * break # TODO: log warning, tally overflow? + * break */ __pyx_v_effective_words = (__pyx_v_effective_words + 1); - /* "gensim/models/fasttext_inner.pyx":342 + /* "gensim/models/fasttext_inner.pyx":333 * * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< - * break # TODO: log warning, tally overflow? + * break * */ __pyx_t_5 = ((__pyx_v_effective_words == 0x2710) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":343 + /* "gensim/models/fasttext_inner.pyx":334 * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: - * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< + * break # <<<<<<<<<<<<<< * * # keep track of which words go into which sentence, so we don't train */ goto __pyx_L12_break; - /* "gensim/models/fasttext_inner.pyx":342 + /* "gensim/models/fasttext_inner.pyx":333 * * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< - * break # TODO: log warning, tally overflow? + * break * */ } - /* "gensim/models/fasttext_inner.pyx":321 + /* "gensim/models/fasttext_inner.pyx":312 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< @@ -4513,7 +4494,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_L12_break:; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":348 + /* "gensim/models/fasttext_inner.pyx":339 * # across sentence boundaries. * # indices of sentence number X are between tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 355, __pyx_L1_error) + __pyx_t_10 = Py_TYPE(__pyx_t_16)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 346, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; for (;;) { @@ -4654,17 +4635,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON if (likely(PyList_CheckExact(__pyx_t_16))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_16)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 355, __pyx_L1_error) + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 346, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 355, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 346, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_16)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 355, __pyx_L1_error) + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 346, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 355, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 346, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } @@ -4674,7 +4655,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 355, __pyx_L1_error) + else __PYX_ERR(0, 346, __pyx_L1_error) } break; } @@ -4685,17 +4666,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "gensim/models/fasttext_inner.pyx":356 + /* "gensim/models/fasttext_inner.pyx":347 * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * with nogil: */ - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 356, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 347, __pyx_L1_error) (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":355 + /* "gensim/models/fasttext_inner.pyx":346 * * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): # <<<<<<<<<<<<<< @@ -4705,7 +4686,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON } __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":358 + /* "gensim/models/fasttext_inner.pyx":349 * reduced_windows[i] = item * * with nogil: # <<<<<<<<<<<<<< @@ -4720,7 +4701,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON #endif /*try:*/ { - /* "gensim/models/fasttext_inner.pyx":359 + /* "gensim/models/fasttext_inner.pyx":350 * * with nogil: * for sent_idx in range(effective_sentences): # <<<<<<<<<<<<<< @@ -4731,7 +4712,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON for (__pyx_t_20 = 0; __pyx_t_20 < __pyx_t_2; __pyx_t_20+=1) { __pyx_v_sent_idx = __pyx_t_20; - /* "gensim/models/fasttext_inner.pyx":360 + /* "gensim/models/fasttext_inner.pyx":351 * with nogil: * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] # <<<<<<<<<<<<<< @@ -4740,7 +4721,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_idx_start = (__pyx_v_sentence_idx[__pyx_v_sent_idx]); - /* "gensim/models/fasttext_inner.pyx":361 + /* "gensim/models/fasttext_inner.pyx":352 * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] # <<<<<<<<<<<<<< @@ -4749,7 +4730,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_idx_end = (__pyx_v_sentence_idx[(__pyx_v_sent_idx + 1)]); - /* "gensim/models/fasttext_inner.pyx":362 + /* "gensim/models/fasttext_inner.pyx":353 * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): # <<<<<<<<<<<<<< @@ -4760,7 +4741,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON for (__pyx_t_22 = __pyx_v_idx_start; __pyx_t_22 < __pyx_t_21; __pyx_t_22+=1) { __pyx_v_i = __pyx_t_22; - /* "gensim/models/fasttext_inner.pyx":363 + /* "gensim/models/fasttext_inner.pyx":354 * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -4769,7 +4750,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":364 + /* "gensim/models/fasttext_inner.pyx":355 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -4779,7 +4760,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_j < __pyx_v_idx_start) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":365 + /* "gensim/models/fasttext_inner.pyx":356 * j = i - window + reduced_windows[i] * if j < idx_start: * j = idx_start # <<<<<<<<<<<<<< @@ -4788,7 +4769,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_j = __pyx_v_idx_start; - /* "gensim/models/fasttext_inner.pyx":364 + /* "gensim/models/fasttext_inner.pyx":355 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -4797,7 +4778,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":366 + /* "gensim/models/fasttext_inner.pyx":357 * if j < idx_start: * j = idx_start * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -4806,7 +4787,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":367 + /* "gensim/models/fasttext_inner.pyx":358 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -4816,7 +4797,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_k > __pyx_v_idx_end) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":368 + /* "gensim/models/fasttext_inner.pyx":359 * k = i + window + 1 - reduced_windows[i] * if k > idx_end: * k = idx_end # <<<<<<<<<<<<<< @@ -4825,7 +4806,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ __pyx_v_k = __pyx_v_idx_end; - /* "gensim/models/fasttext_inner.pyx":367 + /* "gensim/models/fasttext_inner.pyx":358 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -4834,7 +4815,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":369 + /* "gensim/models/fasttext_inner.pyx":360 * if k > idx_end: * k = idx_end * for j in range(j, k): # <<<<<<<<<<<<<< @@ -4845,7 +4826,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON for (__pyx_t_24 = __pyx_v_j; __pyx_t_24 < __pyx_t_23; __pyx_t_24+=1) { __pyx_v_j = __pyx_t_24; - /* "gensim/models/fasttext_inner.pyx":370 + /* "gensim/models/fasttext_inner.pyx":361 * k = idx_end * for j in range(j, k): * if j == i: # <<<<<<<<<<<<<< @@ -4855,16 +4836,16 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON __pyx_t_5 = ((__pyx_v_j == __pyx_v_i) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":371 + /* "gensim/models/fasttext_inner.pyx":362 * for j in range(j, k): * if j == i: * continue # <<<<<<<<<<<<<< * if hs: - * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) + * fast_sentence_sg_hs( */ goto __pyx_L33_continue; - /* "gensim/models/fasttext_inner.pyx":370 + /* "gensim/models/fasttext_inner.pyx":361 * k = idx_end * for j in range(j, k): * if j == i: # <<<<<<<<<<<<<< @@ -4873,59 +4854,59 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON */ } - /* "gensim/models/fasttext_inner.pyx":372 + /* "gensim/models/fasttext_inner.pyx":363 * if j == i: * continue * if hs: # <<<<<<<<<<<<<< - * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) - * if negative: + * fast_sentence_sg_hs( + * points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, */ __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":373 + /* "gensim/models/fasttext_inner.pyx":364 * continue * if hs: - * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) # <<<<<<<<<<<<<< - * if negative: - * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) + * fast_sentence_sg_hs( # <<<<<<<<<<<<<< + * points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, + * subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, */ __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_hs((__pyx_v_points[__pyx_v_j]), (__pyx_v_codes[__pyx_v_j]), (__pyx_v_codelens[__pyx_v_j]), __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1, __pyx_v_size, (__pyx_v_subwords_idx[__pyx_v_i]), (__pyx_v_subwords_idx_len[__pyx_v_i]), __pyx_v__alpha, __pyx_v_work, __pyx_v_l1, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":372 + /* "gensim/models/fasttext_inner.pyx":363 * if j == i: * continue * if hs: # <<<<<<<<<<<<<< - * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) - * if negative: + * fast_sentence_sg_hs( + * points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, */ } - /* "gensim/models/fasttext_inner.pyx":374 - * if hs: - * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) + /* "gensim/models/fasttext_inner.pyx":368 + * subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, + * word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< - * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) - * + * next_random = fast_sentence_sg_neg( + * negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, */ __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":375 - * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) + /* "gensim/models/fasttext_inner.pyx":369 + * word_locks_ngrams) * if negative: - * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) # <<<<<<<<<<<<<< - * - * return effective_words + * next_random = fast_sentence_sg_neg( # <<<<<<<<<<<<<< + * negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, + * indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, */ __pyx_v_next_random = __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_sg_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), (__pyx_v_subwords_idx[__pyx_v_i]), (__pyx_v_subwords_idx_len[__pyx_v_i]), __pyx_v__alpha, __pyx_v_work, __pyx_v_l1, __pyx_v_next_random, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":374 - * if hs: - * fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) + /* "gensim/models/fasttext_inner.pyx":368 + * subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, + * word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< - * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) - * + * next_random = fast_sentence_sg_neg( + * negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, */ } __pyx_L33_continue:; @@ -4934,7 +4915,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON } } - /* "gensim/models/fasttext_inner.pyx":358 + /* "gensim/models/fasttext_inner.pyx":349 * reduced_windows[i] = item * * with nogil: # <<<<<<<<<<<<<< @@ -4953,21 +4934,21 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON } } - /* "gensim/models/fasttext_inner.pyx":377 - * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) + /* "gensim/models/fasttext_inner.pyx":374 + * next_random, word_locks_vocab, word_locks_ngrams) * * return effective_words # <<<<<<<<<<<<<< * * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 377, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 374, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __pyx_r = __pyx_t_16; __pyx_t_16 = 0; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":256 + /* "gensim/models/fasttext_inner.pyx":247 * * * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< @@ -5001,7 +4982,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_train_batch_sg(CYTHON return __pyx_r; } -/* "gensim/models/fasttext_inner.pyx":380 +/* "gensim/models/fasttext_inner.pyx":377 * * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< @@ -5050,29 +5031,29 @@ static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_3train_batch_cbow(PyO case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentences)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 1); __PYX_ERR(0, 380, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 1); __PYX_ERR(0, 377, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 2); __PYX_ERR(0, 380, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 2); __PYX_ERR(0, 377, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 3); __PYX_ERR(0, 380, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 3); __PYX_ERR(0, 377, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 4: if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_neu1)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 4); __PYX_ERR(0, 380, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, 4); __PYX_ERR(0, 377, __pyx_L3_error) } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_batch_cbow") < 0)) __PYX_ERR(0, 380, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_batch_cbow") < 0)) __PYX_ERR(0, 377, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 5) { goto __pyx_L5_argtuple_error; @@ -5091,7 +5072,7 @@ static PyObject *__pyx_pw_6gensim_6models_14fasttext_inner_3train_batch_cbow(PyO } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 380, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("train_batch_cbow", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 377, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("gensim.models.fasttext_inner.train_batch_cbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -5174,155 +5155,155 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT int __pyx_t_22; __Pyx_RefNannySetupContext("train_batch_cbow", 0); - /* "gensim/models/fasttext_inner.pyx":381 + /* "gensim/models/fasttext_inner.pyx":378 * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int sample = (model.sample != 0) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 381, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 378, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 381, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 378, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":382 + /* "gensim/models/fasttext_inner.pyx":379 * def train_batch_cbow(model, sentences, alpha, _work, _neu1): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int sample = (model.sample != 0) * cdef int cbow_mean = model.cbow_mean */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 382, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 379, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 382, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 379, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":383 + /* "gensim/models/fasttext_inner.pyx":380 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int sample = (model.sample != 0) # <<<<<<<<<<<<<< * cdef int cbow_mean = model.cbow_mean * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_sample); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 383, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_sample); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 380, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = PyObject_RichCompare(__pyx_t_1, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 383, __pyx_L1_error) + __pyx_t_3 = PyObject_RichCompare(__pyx_t_1, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 380, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 383, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 380, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_sample = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":384 + /* "gensim/models/fasttext_inner.pyx":381 * cdef int negative = model.negative * cdef int sample = (model.sample != 0) * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 384, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 381, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 384, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 381, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cbow_mean = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":386 + /* "gensim/models/fasttext_inner.pyx":383 * cdef int cbow_mean = model.cbow_mean * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 386, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 383, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn0_vocab); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 386, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_syn0_vocab); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 383, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 386, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 383, __pyx_L1_error) __pyx_v_syn0_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":387 + /* "gensim/models/fasttext_inner.pyx":384 * * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) # <<<<<<<<<<<<<< * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_vocab_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 387, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_vocab_lockf); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 384, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 387, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 384, __pyx_L1_error) __pyx_v_word_locks_vocab = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":388 + /* "gensim/models/fasttext_inner.pyx":385 * cdef REAL_t *syn0_vocab = (np.PyArray_DATA(model.wv.syn0_vocab)) * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 388, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 385, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn0_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 388, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_syn0_ngrams); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 385, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 388, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 385, __pyx_L1_error) __pyx_v_syn0_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":389 + /* "gensim/models/fasttext_inner.pyx":386 * cdef REAL_t *word_locks_vocab = (np.PyArray_DATA(model.syn0_vocab_lockf)) * cdef REAL_t *syn0_ngrams = (np.PyArray_DATA(model.wv.syn0_ngrams)) * cdef REAL_t *word_locks_ngrams = (np.PyArray_DATA(model.syn0_ngrams_lockf)) # <<<<<<<<<<<<<< * * cdef REAL_t *work */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_ngrams_lockf); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 389, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_ngrams_lockf); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 386, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 389, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 386, __pyx_L1_error) __pyx_v_word_locks_ngrams = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":392 + /* "gensim/models/fasttext_inner.pyx":389 * * cdef REAL_t *work * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 392, __pyx_L1_error) + __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == ((npy_float32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 389, __pyx_L1_error) __pyx_v__alpha = __pyx_t_4; - /* "gensim/models/fasttext_inner.pyx":393 + /* "gensim/models/fasttext_inner.pyx":390 * cdef REAL_t *work * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 393, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 390, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 393, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 390, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_size = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":399 + /* "gensim/models/fasttext_inner.pyx":396 * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] * cdef int sentence_idx[MAX_SENTENCE_LEN + 1] * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 399, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 396, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 399, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 396, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_window = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":402 + /* "gensim/models/fasttext_inner.pyx":399 * * cdef int i, j, k * cdef int effective_words = 0, effective_sentences = 0 # <<<<<<<<<<<<<< @@ -5332,19 +5313,19 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_v_effective_words = 0; __pyx_v_effective_sentences = 0; - /* "gensim/models/fasttext_inner.pyx":422 + /* "gensim/models/fasttext_inner.pyx":419 * # dummy dictionary to ensure that the memory locations that subwords_idx point to * # are referenced throughout so that it isn't put back to free memory pool by Python's memory manager * subword_arrays = {} # <<<<<<<<<<<<<< * * if hs: */ - __pyx_t_3 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 422, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 419, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_v_subword_arrays = ((PyObject*)__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":424 + /* "gensim/models/fasttext_inner.pyx":421 * subword_arrays = {} * * if hs: # <<<<<<<<<<<<<< @@ -5354,20 +5335,20 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":425 + /* "gensim/models/fasttext_inner.pyx":422 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 425, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 422, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 425, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 422, __pyx_L1_error) __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":424 + /* "gensim/models/fasttext_inner.pyx":421 * subword_arrays = {} * * if hs: # <<<<<<<<<<<<<< @@ -5376,7 +5357,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":427 + /* "gensim/models/fasttext_inner.pyx":424 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -5386,46 +5367,46 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":428 + /* "gensim/models/fasttext_inner.pyx":425 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 428, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 425, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 428, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 425, __pyx_L1_error) __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":429 + /* "gensim/models/fasttext_inner.pyx":426 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) # <<<<<<<<<<<<<< * cum_table_len = len(model.cum_table) * if negative or sample: */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 426, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 429, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 426, __pyx_L1_error) __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":430 + /* "gensim/models/fasttext_inner.pyx":427 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) # <<<<<<<<<<<<<< * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 427, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == ((Py_ssize_t)-1))) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == ((Py_ssize_t)-1))) __PYX_ERR(0, 427, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cum_table_len = __pyx_t_6; - /* "gensim/models/fasttext_inner.pyx":427 + /* "gensim/models/fasttext_inner.pyx":424 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -5434,7 +5415,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":431 + /* "gensim/models/fasttext_inner.pyx":428 * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -5452,41 +5433,41 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_L6_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":432 + /* "gensim/models/fasttext_inner.pyx":429 * cum_table_len = len(model.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 429, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 429, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 429, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 429, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 429, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 429, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 429, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 429, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) __PYX_ERR(0, 429, __pyx_L1_error) __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_next_random = __pyx_t_9; - /* "gensim/models/fasttext_inner.pyx":431 + /* "gensim/models/fasttext_inner.pyx":428 * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) * if negative or sample: # <<<<<<<<<<<<<< @@ -5495,42 +5476,42 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":435 + /* "gensim/models/fasttext_inner.pyx":432 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * neu1 = np.PyArray_DATA(_neu1) * */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 435, __pyx_L1_error) + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 432, __pyx_L1_error) __pyx_v_work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "gensim/models/fasttext_inner.pyx":436 + /* "gensim/models/fasttext_inner.pyx":433 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * neu1 = np.PyArray_DATA(_neu1) # <<<<<<<<<<<<<< * * # prepare C structures so we can go "full C" and release the Python GIL */ - if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 436, __pyx_L1_error) + if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 433, __pyx_L1_error) __pyx_v_neu1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__neu1))); - /* "gensim/models/fasttext_inner.pyx":439 + /* "gensim/models/fasttext_inner.pyx":436 * * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab # <<<<<<<<<<<<<< * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 439, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 436, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 439, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 436, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_vlookup = __pyx_t_3; __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":440 + /* "gensim/models/fasttext_inner.pyx":437 * # prepare C structures so we can go "full C" and release the Python GIL * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 # <<<<<<<<<<<<<< @@ -5539,7 +5520,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ (__pyx_v_sentence_idx[0]) = 0; - /* "gensim/models/fasttext_inner.pyx":441 + /* "gensim/models/fasttext_inner.pyx":438 * vlookup = model.wv.vocab * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: # <<<<<<<<<<<<<< @@ -5550,26 +5531,26 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_3 = __pyx_v_sentences; __Pyx_INCREF(__pyx_t_3); __pyx_t_6 = 0; __pyx_t_10 = NULL; } else { - __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_6 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_sentences); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 438, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 438, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_10)) { if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_8 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 438, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 438, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 438, __pyx_L1_error) #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 441, __pyx_L1_error) + __pyx_t_8 = PySequence_ITEM(__pyx_t_3, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 438, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); #endif } @@ -5579,7 +5560,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 441, __pyx_L1_error) + else __PYX_ERR(0, 438, __pyx_L1_error) } break; } @@ -5588,18 +5569,18 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_sent, __pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":442 + /* "gensim/models/fasttext_inner.pyx":439 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: */ - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 442, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_v_sent); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 439, __pyx_L1_error) __pyx_t_7 = ((!__pyx_t_5) != 0); if (__pyx_t_7) { - /* "gensim/models/fasttext_inner.pyx":443 + /* "gensim/models/fasttext_inner.pyx":440 * for sent in sentences: * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged # <<<<<<<<<<<<<< @@ -5608,7 +5589,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L8_continue; - /* "gensim/models/fasttext_inner.pyx":442 + /* "gensim/models/fasttext_inner.pyx":439 * sentence_idx[0] = 0 # indices of the first sentence always start at 0 * for sent in sentences: * if not sent: # <<<<<<<<<<<<<< @@ -5617,7 +5598,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":444 + /* "gensim/models/fasttext_inner.pyx":441 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< @@ -5628,26 +5609,26 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_8 = __pyx_v_sent; __Pyx_INCREF(__pyx_t_8); __pyx_t_11 = 0; __pyx_t_12 = NULL; } else { - __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 444, __pyx_L1_error) + __pyx_t_11 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sent); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 441, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); - __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 444, __pyx_L1_error) + __pyx_t_12 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 441, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_12)) { if (likely(PyList_CheckExact(__pyx_t_8))) { if (__pyx_t_11 >= PyList_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 444, __pyx_L1_error) + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 441, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 444, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 441, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_11 >= PyTuple_GET_SIZE(__pyx_t_8)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 444, __pyx_L1_error) + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_11); __Pyx_INCREF(__pyx_t_1); __pyx_t_11++; if (unlikely(0 < 0)) __PYX_ERR(0, 441, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 444, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_8, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 441, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } @@ -5657,7 +5638,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 444, __pyx_L1_error) + else __PYX_ERR(0, 441, __pyx_L1_error) } break; } @@ -5666,16 +5647,16 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":445 + /* "gensim/models/fasttext_inner.pyx":442 * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: * word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window */ - __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 445, __pyx_L1_error) + __pyx_t_7 = (__Pyx_PySequence_ContainsTF(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 442, __pyx_L1_error) if ((__pyx_t_7 != 0)) { - __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 445, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 442, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __pyx_t_1 = __pyx_t_13; __pyx_t_13 = 0; @@ -5686,7 +5667,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/fasttext_inner.pyx":446 + /* "gensim/models/fasttext_inner.pyx":443 * for token in sent: * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< @@ -5697,7 +5678,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_t_7 != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":447 + /* "gensim/models/fasttext_inner.pyx":444 * word = vlookup[token] if token in vlookup else None * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window # <<<<<<<<<<<<<< @@ -5706,7 +5687,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":446 + /* "gensim/models/fasttext_inner.pyx":443 * for token in sent: * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< @@ -5715,7 +5696,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":448 + /* "gensim/models/fasttext_inner.pyx":445 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -5728,20 +5709,20 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = __pyx_t_7; goto __pyx_L15_bool_binop_done; } - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 448, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 445, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 448, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 445, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 448, __pyx_L1_error) + __pyx_t_14 = PyObject_RichCompare(__pyx_t_1, __pyx_t_13, Py_LT); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 445, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 448, __pyx_L1_error) + __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 445, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __pyx_t_5 = __pyx_t_7; __pyx_L15_bool_binop_done:; if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":449 + /* "gensim/models/fasttext_inner.pyx":446 * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): * continue # <<<<<<<<<<<<<< @@ -5750,7 +5731,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ goto __pyx_L11_continue; - /* "gensim/models/fasttext_inner.pyx":448 + /* "gensim/models/fasttext_inner.pyx":445 * if word is None: * continue # leaving `effective_words` unchanged = shortening the sentence = expanding the window * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< @@ -5759,45 +5740,45 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":450 + /* "gensim/models/fasttext_inner.pyx":447 * if sample and word.sample_int < random_int32(&next_random): * continue * indexes[effective_words] = word.index # <<<<<<<<<<<<<< * * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] */ - __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_14 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 447, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 450, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_t_14); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 447, __pyx_L1_error) __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; (__pyx_v_indexes[__pyx_v_effective_words]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":452 + /* "gensim/models/fasttext_inner.pyx":449 * indexes[effective_words] = word.index * * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] # <<<<<<<<<<<<<< * word_subwords = np.array(subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = len(subwords) */ - __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 449, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 449, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams_word); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams_word); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 449, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 449, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_index2word); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_index2word); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 449, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 449, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_17 = PyObject_GetItem(__pyx_t_16, __pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_17 = PyObject_GetItem(__pyx_t_16, __pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 449, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_t_17); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_t_17); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 449, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; @@ -5805,9 +5786,9 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_17 = __pyx_t_13; __Pyx_INCREF(__pyx_t_17); __pyx_t_18 = 0; __pyx_t_19 = NULL; } else { - __pyx_t_18 = -1; __pyx_t_17 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_18 = -1; __pyx_t_17 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 449, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); - __pyx_t_19 = Py_TYPE(__pyx_t_17)->tp_iternext; if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_19 = Py_TYPE(__pyx_t_17)->tp_iternext; if (unlikely(!__pyx_t_19)) __PYX_ERR(0, 449, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; for (;;) { @@ -5815,17 +5796,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT if (likely(PyList_CheckExact(__pyx_t_17))) { if (__pyx_t_18 >= PyList_GET_SIZE(__pyx_t_17)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyList_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_13 = PyList_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 449, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 449, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); #endif } else { if (__pyx_t_18 >= PyTuple_GET_SIZE(__pyx_t_17)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_17, __pyx_t_18); __Pyx_INCREF(__pyx_t_13); __pyx_t_18++; if (unlikely(0 < 0)) __PYX_ERR(0, 449, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_13 = PySequence_ITEM(__pyx_t_17, __pyx_t_18); __pyx_t_18++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 449, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); #endif } @@ -5835,7 +5816,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 452, __pyx_L1_error) + else __PYX_ERR(0, 449, __pyx_L1_error) } break; } @@ -5843,48 +5824,48 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT } __Pyx_XDECREF_SET(__pyx_v_subword_i, __pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_wv); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 449, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_ngrams); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 449, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_v_subword_i); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 452, __pyx_L1_error) + __pyx_t_13 = PyObject_GetItem(__pyx_t_1, __pyx_v_subword_i); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 449, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_13))) __PYX_ERR(0, 452, __pyx_L1_error) + if (unlikely(__Pyx_ListComp_Append(__pyx_t_14, (PyObject*)__pyx_t_13))) __PYX_ERR(0, 449, __pyx_L1_error) __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; } __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_XDECREF_SET(__pyx_v_subwords, ((PyObject*)__pyx_t_14)); __pyx_t_14 = 0; - /* "gensim/models/fasttext_inner.pyx":453 + /* "gensim/models/fasttext_inner.pyx":450 * * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] * word_subwords = np.array(subwords, dtype=np.uint32) # <<<<<<<<<<<<<< * subwords_idx_len[effective_words] = len(subwords) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) */ - __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); - __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_14, __pyx_n_s_array); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = PyTuple_New(1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_14 = PyTuple_New(1); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_14); __Pyx_INCREF(__pyx_v_subwords); __Pyx_GIVEREF(__pyx_v_subwords); PyTuple_SET_ITEM(__pyx_t_14, 0, __pyx_v_subwords); - __pyx_t_13 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_uint32); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_uint32); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyDict_SetItem(__pyx_t_13, __pyx_n_s_dtype, __pyx_t_16) < 0) __PYX_ERR(0, 453, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_13, __pyx_n_s_dtype, __pyx_t_16) < 0) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __pyx_t_16 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_t_14, __pyx_t_13); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 453, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_t_14, __pyx_t_13); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 450, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; @@ -5892,39 +5873,39 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __Pyx_XDECREF_SET(__pyx_v_word_subwords, __pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":454 + /* "gensim/models/fasttext_inner.pyx":451 * subwords = [model.wv.ngrams[subword_i] for subword_i in model.wv.ngrams_word[model.wv.index2word[word.index]]] * word_subwords = np.array(subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = len(subwords) # <<<<<<<<<<<<<< * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 */ - __pyx_t_18 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 454, __pyx_L1_error) + __pyx_t_18 = PyList_GET_SIZE(__pyx_v_subwords); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 451, __pyx_L1_error) (__pyx_v_subwords_idx_len[__pyx_v_effective_words]) = ((int)__pyx_t_18); - /* "gensim/models/fasttext_inner.pyx":455 + /* "gensim/models/fasttext_inner.pyx":452 * word_subwords = np.array(subwords, dtype=np.uint32) * subwords_idx_len[effective_words] = len(subwords) * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) # <<<<<<<<<<<<<< * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords */ - if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 455, __pyx_L1_error) + if (!(likely(((__pyx_v_word_subwords) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_subwords, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 452, __pyx_L1_error) (__pyx_v_subwords_idx[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_subwords))); - /* "gensim/models/fasttext_inner.pyx":457 + /* "gensim/models/fasttext_inner.pyx":454 * subwords_idx[effective_words] = np.PyArray_DATA(word_subwords) * # ensures reference count of word_subwords doesn't reach 0 * subword_arrays[effective_words] = word_subwords # <<<<<<<<<<<<<< * * if hs: */ - __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 457, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_16, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 457, __pyx_L1_error) + if (unlikely(PyDict_SetItem(__pyx_v_subword_arrays, __pyx_t_16, __pyx_v_word_subwords) < 0)) __PYX_ERR(0, 454, __pyx_L1_error) __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":459 + /* "gensim/models/fasttext_inner.pyx":456 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -5934,46 +5915,46 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":460 + /* "gensim/models/fasttext_inner.pyx":457 * * if hs: * codelens[effective_words] = len(word.code) # <<<<<<<<<<<<<< * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 460, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 457, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - __pyx_t_18 = PyObject_Length(__pyx_t_16); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 460, __pyx_L1_error) + __pyx_t_18 = PyObject_Length(__pyx_t_16); if (unlikely(__pyx_t_18 == ((Py_ssize_t)-1))) __PYX_ERR(0, 457, __pyx_L1_error) __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; (__pyx_v_codelens[__pyx_v_effective_words]) = ((int)__pyx_t_18); - /* "gensim/models/fasttext_inner.pyx":461 + /* "gensim/models/fasttext_inner.pyx":458 * if hs: * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 461, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 458, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 461, __pyx_L1_error) + if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 458, __pyx_L1_error) (__pyx_v_codes[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_16))); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":462 + /* "gensim/models/fasttext_inner.pyx":459 * codelens[effective_words] = len(word.code) * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: */ - __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 462, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 459, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); - if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 462, __pyx_L1_error) + if (!(likely(((__pyx_t_16) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_16, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 459, __pyx_L1_error) (__pyx_v_points[__pyx_v_effective_words]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_16))); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":459 + /* "gensim/models/fasttext_inner.pyx":456 * subword_arrays[effective_words] = word_subwords * * if hs: # <<<<<<<<<<<<<< @@ -5982,44 +5963,44 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":463 + /* "gensim/models/fasttext_inner.pyx":460 * codes[effective_words] = np.PyArray_DATA(word.code) * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 # <<<<<<<<<<<<<< * if effective_words == MAX_SENTENCE_LEN: - * break # TODO: log warning, tally overflow? + * break */ __pyx_v_effective_words = (__pyx_v_effective_words + 1); - /* "gensim/models/fasttext_inner.pyx":464 + /* "gensim/models/fasttext_inner.pyx":461 * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< - * break # TODO: log warning, tally overflow? + * break * */ __pyx_t_5 = ((__pyx_v_effective_words == 0x2710) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":465 + /* "gensim/models/fasttext_inner.pyx":462 * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: - * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< + * break # <<<<<<<<<<<<<< * * # keep track of which words go into which sentence, so we don't train */ goto __pyx_L12_break; - /* "gensim/models/fasttext_inner.pyx":464 + /* "gensim/models/fasttext_inner.pyx":461 * points[effective_words] = np.PyArray_DATA(word.point) * effective_words += 1 * if effective_words == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< - * break # TODO: log warning, tally overflow? + * break * */ } - /* "gensim/models/fasttext_inner.pyx":444 + /* "gensim/models/fasttext_inner.pyx":441 * if not sent: * continue # ignore empty sentences; leave effective_sentences unchanged * for token in sent: # <<<<<<<<<<<<<< @@ -6031,7 +6012,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_L12_break:; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/fasttext_inner.pyx":470 + /* "gensim/models/fasttext_inner.pyx":467 * # across sentence boundaries. * # indices of sentence number X are between tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 477, __pyx_L1_error) + __pyx_t_10 = Py_TYPE(__pyx_t_16)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 474, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; for (;;) { @@ -6172,17 +6153,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT if (likely(PyList_CheckExact(__pyx_t_16))) { if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_16)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 477, __pyx_L1_error) + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 474, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 477, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 474, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } else { if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_16)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 477, __pyx_L1_error) + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_16, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 474, __pyx_L1_error) #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 477, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(__pyx_t_16, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 474, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); #endif } @@ -6192,7 +6173,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 477, __pyx_L1_error) + else __PYX_ERR(0, 474, __pyx_L1_error) } break; } @@ -6203,17 +6184,17 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "gensim/models/fasttext_inner.pyx":478 + /* "gensim/models/fasttext_inner.pyx":475 * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * # release GIL & train on all sentences */ - __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 478, __pyx_L1_error) + __pyx_t_15 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_15 == ((npy_uint32)-1)) && PyErr_Occurred())) __PYX_ERR(0, 475, __pyx_L1_error) (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_15; - /* "gensim/models/fasttext_inner.pyx":477 + /* "gensim/models/fasttext_inner.pyx":474 * * # precompute "reduced window" offsets in a single randint() call * for i, item in enumerate(model.random.randint(0, window, effective_words)): # <<<<<<<<<<<<<< @@ -6223,7 +6204,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT } __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - /* "gensim/models/fasttext_inner.pyx":481 + /* "gensim/models/fasttext_inner.pyx":478 * * # release GIL & train on all sentences * with nogil: # <<<<<<<<<<<<<< @@ -6238,7 +6219,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT #endif /*try:*/ { - /* "gensim/models/fasttext_inner.pyx":482 + /* "gensim/models/fasttext_inner.pyx":479 * # release GIL & train on all sentences * with nogil: * for sent_idx in range(effective_sentences): # <<<<<<<<<<<<<< @@ -6249,7 +6230,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT for (__pyx_t_20 = 0; __pyx_t_20 < __pyx_t_2; __pyx_t_20+=1) { __pyx_v_sent_idx = __pyx_t_20; - /* "gensim/models/fasttext_inner.pyx":483 + /* "gensim/models/fasttext_inner.pyx":480 * with nogil: * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] # <<<<<<<<<<<<<< @@ -6258,7 +6239,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_idx_start = (__pyx_v_sentence_idx[__pyx_v_sent_idx]); - /* "gensim/models/fasttext_inner.pyx":484 + /* "gensim/models/fasttext_inner.pyx":481 * for sent_idx in range(effective_sentences): * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] # <<<<<<<<<<<<<< @@ -6267,7 +6248,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_idx_end = (__pyx_v_sentence_idx[(__pyx_v_sent_idx + 1)]); - /* "gensim/models/fasttext_inner.pyx":485 + /* "gensim/models/fasttext_inner.pyx":482 * idx_start = sentence_idx[sent_idx] * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): # <<<<<<<<<<<<<< @@ -6278,7 +6259,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT for (__pyx_t_22 = __pyx_v_idx_start; __pyx_t_22 < __pyx_t_21; __pyx_t_22+=1) { __pyx_v_i = __pyx_t_22; - /* "gensim/models/fasttext_inner.pyx":486 + /* "gensim/models/fasttext_inner.pyx":483 * idx_end = sentence_idx[sent_idx + 1] * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -6287,7 +6268,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":487 + /* "gensim/models/fasttext_inner.pyx":484 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -6297,7 +6278,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = ((__pyx_v_j < __pyx_v_idx_start) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":488 + /* "gensim/models/fasttext_inner.pyx":485 * j = i - window + reduced_windows[i] * if j < idx_start: * j = idx_start # <<<<<<<<<<<<<< @@ -6306,7 +6287,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_j = __pyx_v_idx_start; - /* "gensim/models/fasttext_inner.pyx":487 + /* "gensim/models/fasttext_inner.pyx":484 * for i in range(idx_start, idx_end): * j = i - window + reduced_windows[i] * if j < idx_start: # <<<<<<<<<<<<<< @@ -6315,7 +6296,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":489 + /* "gensim/models/fasttext_inner.pyx":486 * if j < idx_start: * j = idx_start * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -6324,7 +6305,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/fasttext_inner.pyx":490 + /* "gensim/models/fasttext_inner.pyx":487 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -6334,7 +6315,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT __pyx_t_5 = ((__pyx_v_k > __pyx_v_idx_end) != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":491 + /* "gensim/models/fasttext_inner.pyx":488 * k = i + window + 1 - reduced_windows[i] * if k > idx_end: * k = idx_end # <<<<<<<<<<<<<< @@ -6343,7 +6324,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ __pyx_v_k = __pyx_v_idx_end; - /* "gensim/models/fasttext_inner.pyx":490 + /* "gensim/models/fasttext_inner.pyx":487 * j = idx_start * k = i + window + 1 - reduced_windows[i] * if k > idx_end: # <<<<<<<<<<<<<< @@ -6352,66 +6333,66 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT */ } - /* "gensim/models/fasttext_inner.pyx":493 + /* "gensim/models/fasttext_inner.pyx":490 * k = idx_end * * if hs: # <<<<<<<<<<<<<< - * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0_vocab, syn0_ngrams, syn1, size, indexes,subwords_idx, - * subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, word_locks_ngrams) + * fast_sentence_cbow_hs( + * points[i], codes[i], codelens, neu1, syn0_vocab, syn0_ngrams, syn1, size,indexes, */ __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":494 + /* "gensim/models/fasttext_inner.pyx":491 * * if hs: - * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0_vocab, syn0_ngrams, syn1, size, indexes,subwords_idx, # <<<<<<<<<<<<<< - * subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, word_locks_ngrams) - * if negative: + * fast_sentence_cbow_hs( # <<<<<<<<<<<<<< + * points[i], codes[i], codelens, neu1, syn0_vocab, syn0_ngrams, syn1, size,indexes, + * subwords_idx,subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, */ __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1, __pyx_v_size, __pyx_v_indexes, __pyx_v_subwords_idx, __pyx_v_subwords_idx_len, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":493 + /* "gensim/models/fasttext_inner.pyx":490 * k = idx_end * * if hs: # <<<<<<<<<<<<<< - * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0_vocab, syn0_ngrams, syn1, size, indexes,subwords_idx, - * subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, word_locks_ngrams) + * fast_sentence_cbow_hs( + * points[i], codes[i], codelens, neu1, syn0_vocab, syn0_ngrams, syn1, size,indexes, */ } - /* "gensim/models/fasttext_inner.pyx":496 - * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0_vocab, syn0_ngrams, syn1, size, indexes,subwords_idx, - * subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, word_locks_ngrams) + /* "gensim/models/fasttext_inner.pyx":495 + * subwords_idx,subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, + * word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< - * next_random = fast_sentence_cbow_neg(negative, cum_table, cum_table_len, codelens, neu1, syn0_vocab, syn0_ngrams, syn1neg, size, indexes, subwords_idx, - * subwords_idx_len, _alpha, work, i, j, k, cbow_mean, next_random, word_locks_vocab, word_locks_ngrams) + * next_random = fast_sentence_cbow_neg( + * negative, cum_table, cum_table_len, codelens, neu1, syn0_vocab, syn0_ngrams, */ __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "gensim/models/fasttext_inner.pyx":497 - * subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, word_locks_ngrams) + /* "gensim/models/fasttext_inner.pyx":496 + * word_locks_ngrams) * if negative: - * next_random = fast_sentence_cbow_neg(negative, cum_table, cum_table_len, codelens, neu1, syn0_vocab, syn0_ngrams, syn1neg, size, indexes, subwords_idx, # <<<<<<<<<<<<<< - * subwords_idx_len, _alpha, work, i, j, k, cbow_mean, next_random, word_locks_vocab, word_locks_ngrams) - * + * next_random = fast_sentence_cbow_neg( # <<<<<<<<<<<<<< + * negative, cum_table, cum_table_len, codelens, neu1, syn0_vocab, syn0_ngrams, + * syn1neg, size, indexes, subwords_idx, subwords_idx_len, _alpha, work, i, j, k, */ __pyx_v_next_random = __pyx_f_6gensim_6models_14fasttext_inner_fast_sentence_cbow_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0_vocab, __pyx_v_syn0_ngrams, __pyx_v_syn1neg, __pyx_v_size, __pyx_v_indexes, __pyx_v_subwords_idx, __pyx_v_subwords_idx_len, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_next_random, __pyx_v_word_locks_vocab, __pyx_v_word_locks_ngrams); - /* "gensim/models/fasttext_inner.pyx":496 - * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0_vocab, syn0_ngrams, syn1, size, indexes,subwords_idx, - * subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, word_locks_ngrams) + /* "gensim/models/fasttext_inner.pyx":495 + * subwords_idx,subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, + * word_locks_ngrams) * if negative: # <<<<<<<<<<<<<< - * next_random = fast_sentence_cbow_neg(negative, cum_table, cum_table_len, codelens, neu1, syn0_vocab, syn0_ngrams, syn1neg, size, indexes, subwords_idx, - * subwords_idx_len, _alpha, work, i, j, k, cbow_mean, next_random, word_locks_vocab, word_locks_ngrams) + * next_random = fast_sentence_cbow_neg( + * negative, cum_table, cum_table_len, codelens, neu1, syn0_vocab, syn0_ngrams, */ } } } } - /* "gensim/models/fasttext_inner.pyx":481 + /* "gensim/models/fasttext_inner.pyx":478 * * # release GIL & train on all sentences * with nogil: # <<<<<<<<<<<<<< @@ -6430,21 +6411,21 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT } } - /* "gensim/models/fasttext_inner.pyx":500 - * subwords_idx_len, _alpha, work, i, j, k, cbow_mean, next_random, word_locks_vocab, word_locks_ngrams) + /* "gensim/models/fasttext_inner.pyx":501 + * cbow_mean, next_random, word_locks_vocab, word_locks_ngrams) * * return effective_words # <<<<<<<<<<<<<< * * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 500, __pyx_L1_error) + __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_effective_words); if (unlikely(!__pyx_t_16)) __PYX_ERR(0, 501, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_16); __pyx_r = __pyx_t_16; __pyx_t_16 = 0; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":380 + /* "gensim/models/fasttext_inner.pyx":377 * * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< @@ -6478,7 +6459,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_2train_batch_cbow(CYT return __pyx_r; } -/* "gensim/models/fasttext_inner.pyx":503 +/* "gensim/models/fasttext_inner.pyx":504 * * * def init(): # <<<<<<<<<<<<<< @@ -6517,7 +6498,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P int __pyx_t_4; __Pyx_RefNannySetupContext("init", 0); - /* "gensim/models/fasttext_inner.pyx":512 + /* "gensim/models/fasttext_inner.pyx":513 * * cdef int i * cdef float *x = [10.0] # <<<<<<<<<<<<<< @@ -6527,7 +6508,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_1[0] = ((float)10.0); __pyx_v_x = __pyx_t_1; - /* "gensim/models/fasttext_inner.pyx":513 + /* "gensim/models/fasttext_inner.pyx":514 * cdef int i * cdef float *x = [10.0] * cdef float *y = [0.01] # <<<<<<<<<<<<<< @@ -6537,7 +6518,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_2[0] = ((float)0.01); __pyx_v_y = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":514 + /* "gensim/models/fasttext_inner.pyx":515 * cdef float *x = [10.0] * cdef float *y = [0.01] * cdef float expected = 0.1 # <<<<<<<<<<<<<< @@ -6546,7 +6527,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_expected = ((float)0.1); - /* "gensim/models/fasttext_inner.pyx":515 + /* "gensim/models/fasttext_inner.pyx":516 * cdef float *y = [0.01] * cdef float expected = 0.1 * cdef int size = 1 # <<<<<<<<<<<<<< @@ -6555,7 +6536,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_size = 1; - /* "gensim/models/fasttext_inner.pyx":520 + /* "gensim/models/fasttext_inner.pyx":521 * * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): # <<<<<<<<<<<<<< @@ -6565,7 +6546,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P for (__pyx_t_3 = 0; __pyx_t_3 < 0x3E8; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "gensim/models/fasttext_inner.pyx":521 + /* "gensim/models/fasttext_inner.pyx":522 * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) # <<<<<<<<<<<<<< @@ -6574,7 +6555,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)exp(((((__pyx_v_i / ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0x3E8)) * 2.0) - 1.0) * 6.0))); - /* "gensim/models/fasttext_inner.pyx":522 + /* "gensim/models/fasttext_inner.pyx":523 * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) # <<<<<<<<<<<<<< @@ -6583,7 +6564,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) / ((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) + 1.0))); - /* "gensim/models/fasttext_inner.pyx":523 + /* "gensim/models/fasttext_inner.pyx":524 * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) * LOG_TABLE[i] = log( EXP_TABLE[i] ) # <<<<<<<<<<<<<< @@ -6593,7 +6574,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P (__pyx_v_6gensim_6models_14fasttext_inner_LOG_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)log((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]))); } - /* "gensim/models/fasttext_inner.pyx":526 + /* "gensim/models/fasttext_inner.pyx":527 * * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) # <<<<<<<<<<<<<< @@ -6602,7 +6583,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_d_res = __pyx_v_6gensim_6models_14word2vec_inner_dsdot((&__pyx_v_size), __pyx_v_x, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_y, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":527 + /* "gensim/models/fasttext_inner.pyx":528 * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res # <<<<<<<<<<<<<< @@ -6611,7 +6592,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_p_res = ((float *)(&__pyx_v_d_res)); - /* "gensim/models/fasttext_inner.pyx":528 + /* "gensim/models/fasttext_inner.pyx":529 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6621,7 +6602,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_4 = ((fabs((__pyx_v_d_res - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":529 + /* "gensim/models/fasttext_inner.pyx":530 * p_res = &d_res * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double # <<<<<<<<<<<<<< @@ -6630,7 +6611,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_double; - /* "gensim/models/fasttext_inner.pyx":530 + /* "gensim/models/fasttext_inner.pyx":531 * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -6639,7 +6620,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_6gensim_6models_14word2vec_inner_saxpy; - /* "gensim/models/fasttext_inner.pyx":531 + /* "gensim/models/fasttext_inner.pyx":532 * our_dot = our_dot_double * our_saxpy = saxpy * return 0 # double # <<<<<<<<<<<<<< @@ -6651,7 +6632,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_r = __pyx_int_0; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":528 + /* "gensim/models/fasttext_inner.pyx":529 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6660,7 +6641,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ } - /* "gensim/models/fasttext_inner.pyx":532 + /* "gensim/models/fasttext_inner.pyx":533 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6670,7 +6651,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_t_4 = ((fabsf(((__pyx_v_p_res[0]) - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "gensim/models/fasttext_inner.pyx":533 + /* "gensim/models/fasttext_inner.pyx":534 * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float # <<<<<<<<<<<<<< @@ -6679,7 +6660,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_float; - /* "gensim/models/fasttext_inner.pyx":534 + /* "gensim/models/fasttext_inner.pyx":535 * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -6688,7 +6669,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_6gensim_6models_14word2vec_inner_saxpy; - /* "gensim/models/fasttext_inner.pyx":535 + /* "gensim/models/fasttext_inner.pyx":536 * our_dot = our_dot_float * our_saxpy = saxpy * return 1 # float # <<<<<<<<<<<<<< @@ -6700,7 +6681,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P __pyx_r = __pyx_int_1; goto __pyx_L0; - /* "gensim/models/fasttext_inner.pyx":532 + /* "gensim/models/fasttext_inner.pyx":533 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6709,7 +6690,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ } - /* "gensim/models/fasttext_inner.pyx":539 + /* "gensim/models/fasttext_inner.pyx":540 * # neither => use cython loops, no BLAS * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas # <<<<<<<<<<<<<< @@ -6719,7 +6700,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P /*else*/ { __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_noblas; - /* "gensim/models/fasttext_inner.pyx":540 + /* "gensim/models/fasttext_inner.pyx":541 * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas # <<<<<<<<<<<<<< @@ -6728,7 +6709,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas; - /* "gensim/models/fasttext_inner.pyx":541 + /* "gensim/models/fasttext_inner.pyx":542 * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas * return 2 # <<<<<<<<<<<<<< @@ -6741,7 +6722,7 @@ static PyObject *__pyx_pf_6gensim_6models_14fasttext_inner_4init(CYTHON_UNUSED P goto __pyx_L0; } - /* "gensim/models/fasttext_inner.pyx":503 + /* "gensim/models/fasttext_inner.pyx":504 * * * def init(): # <<<<<<<<<<<<<< @@ -9404,7 +9385,6 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_kp_s_fasttext_inner_pyx, __pyx_k_fasttext_inner_pyx, sizeof(__pyx_k_fasttext_inner_pyx), 0, 0, 1, 0}, {&__pyx_n_s_fblas, __pyx_k_fblas, sizeof(__pyx_k_fblas), 0, 0, 1, 1}, {&__pyx_n_s_float32, __pyx_k_float32, sizeof(__pyx_k_float32), 0, 0, 1, 1}, - {&__pyx_n_s_gensim, __pyx_k_gensim, sizeof(__pyx_k_gensim), 0, 0, 1, 1}, {&__pyx_n_s_gensim_models_fasttext_inner, __pyx_k_gensim_models_fasttext_inner, sizeof(__pyx_k_gensim_models_fasttext_inner), 0, 0, 1, 1}, {&__pyx_n_s_hs, __pyx_k_hs, sizeof(__pyx_k_hs), 0, 0, 1, 1}, {&__pyx_n_s_i, __pyx_k_i, sizeof(__pyx_k_i), 0, 0, 1, 1}, @@ -9422,7 +9402,6 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_l1_2, __pyx_k_l1_2, sizeof(__pyx_k_l1_2), 0, 0, 1, 1}, {&__pyx_n_s_layer1_size, __pyx_k_layer1_size, sizeof(__pyx_k_layer1_size), 0, 0, 1, 1}, {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, - {&__pyx_n_s_matutils, __pyx_k_matutils, sizeof(__pyx_k_matutils), 0, 0, 1, 1}, {&__pyx_n_s_model, __pyx_k_model, sizeof(__pyx_k_model), 0, 0, 1, 1}, {&__pyx_kp_u_ndarray_is_not_C_contiguous, __pyx_k_ndarray_is_not_C_contiguous, sizeof(__pyx_k_ndarray_is_not_C_contiguous), 0, 1, 0, 0}, {&__pyx_kp_u_ndarray_is_not_Fortran_contiguou, __pyx_k_ndarray_is_not_Fortran_contiguou, sizeof(__pyx_k_ndarray_is_not_Fortran_contiguou), 0, 1, 0, 0}, @@ -9472,7 +9451,6 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_vocab, __pyx_k_vocab, sizeof(__pyx_k_vocab), 0, 0, 1, 1}, {&__pyx_n_s_window, __pyx_k_window, sizeof(__pyx_k_window), 0, 0, 1, 1}, {&__pyx_n_s_word, __pyx_k_word, sizeof(__pyx_k_word), 0, 0, 1, 1}, - {&__pyx_n_s_word2vec, __pyx_k_word2vec, sizeof(__pyx_k_word2vec), 0, 0, 1, 1}, {&__pyx_n_s_word_locks_ngrams, __pyx_k_word_locks_ngrams, sizeof(__pyx_k_word_locks_ngrams), 0, 0, 1, 1}, {&__pyx_n_s_word_locks_vocab, __pyx_k_word_locks_vocab, sizeof(__pyx_k_word_locks_vocab), 0, 0, 1, 1}, {&__pyx_n_s_word_subwords, __pyx_k_word_subwords, sizeof(__pyx_k_word_subwords), 0, 0, 1, 1}, @@ -9484,9 +9462,9 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {0, 0, 0, 0, 0, 0, 0} }; static int __Pyx_InitCachedBuiltins(void) { - __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_n_s_ImportError); if (!__pyx_builtin_ImportError) __PYX_ERR(0, 22, __pyx_L1_error) - __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 67, __pyx_L1_error) - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(0, 355, __pyx_L1_error) + __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_n_s_ImportError); if (!__pyx_builtin_ImportError) __PYX_ERR(0, 18, __pyx_L1_error) + __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 62, __pyx_L1_error) + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(0, 346, __pyx_L1_error) __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) __PYX_ERR(1, 235, __pyx_L1_error) __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) __PYX_ERR(1, 823, __pyx_L1_error) return 0; @@ -9498,31 +9476,31 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - /* "gensim/models/fasttext_inner.pyx":309 + /* "gensim/models/fasttext_inner.pyx":300 * cum_table_len = len(model.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 300, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple_); __Pyx_GIVEREF(__pyx_tuple_); - __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(0, 300, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); - /* "gensim/models/fasttext_inner.pyx":432 + /* "gensim/models/fasttext_inner.pyx":429 * cum_table_len = len(model.cum_table) * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) __PYX_ERR(0, 429, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__3); __Pyx_GIVEREF(__pyx_tuple__3); - __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(0, 432, __pyx_L1_error) + __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(0, 429, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__4); __Pyx_GIVEREF(__pyx_tuple__4); @@ -9623,41 +9601,41 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__13); __Pyx_GIVEREF(__pyx_tuple__13); - /* "gensim/models/fasttext_inner.pyx":256 + /* "gensim/models/fasttext_inner.pyx":247 * * * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__15 = PyTuple_Pack(47, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_l1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_l1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword_i); if (unlikely(!__pyx_tuple__15)) __PYX_ERR(0, 256, __pyx_L1_error) + __pyx_tuple__15 = PyTuple_Pack(47, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_l1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_l1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword_i); if (unlikely(!__pyx_tuple__15)) __PYX_ERR(0, 247, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__15); __Pyx_GIVEREF(__pyx_tuple__15); - __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(5, 0, 47, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_train_batch_sg, 256, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) __PYX_ERR(0, 256, __pyx_L1_error) + __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(5, 0, 47, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_train_batch_sg, 247, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) __PYX_ERR(0, 247, __pyx_L1_error) - /* "gensim/models/fasttext_inner.pyx":380 + /* "gensim/models/fasttext_inner.pyx":377 * * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__17 = PyTuple_Pack(48, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_cbow_mean, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_neu1_2, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword_i); if (unlikely(!__pyx_tuple__17)) __PYX_ERR(0, 380, __pyx_L1_error) + __pyx_tuple__17 = PyTuple_Pack(48, __pyx_n_s_model, __pyx_n_s_sentences, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_cbow_mean, __pyx_n_s_syn0_vocab, __pyx_n_s_word_locks_vocab, __pyx_n_s_syn0_ngrams, __pyx_n_s_word_locks_ngrams, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_idx, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_effective_words, __pyx_n_s_effective_sentences, __pyx_n_s_sent_idx, __pyx_n_s_idx_start, __pyx_n_s_idx_end, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_subwords_idx_len, __pyx_n_s_subwords_idx, __pyx_n_s_subword_arrays, __pyx_n_s_neu1_2, __pyx_n_s_vlookup, __pyx_n_s_sent, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_subwords, __pyx_n_s_word_subwords, __pyx_n_s_item, __pyx_n_s_subword_i); if (unlikely(!__pyx_tuple__17)) __PYX_ERR(0, 377, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__17); __Pyx_GIVEREF(__pyx_tuple__17); - __pyx_codeobj__18 = (PyObject*)__Pyx_PyCode_New(5, 0, 48, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__17, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_train_batch_cbow, 380, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__18)) __PYX_ERR(0, 380, __pyx_L1_error) + __pyx_codeobj__18 = (PyObject*)__Pyx_PyCode_New(5, 0, 48, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__17, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_train_batch_cbow, 377, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__18)) __PYX_ERR(0, 377, __pyx_L1_error) - /* "gensim/models/fasttext_inner.pyx":503 + /* "gensim/models/fasttext_inner.pyx":504 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_tuple__19 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__19)) __PYX_ERR(0, 503, __pyx_L1_error) + __pyx_tuple__19 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__19)) __PYX_ERR(0, 504, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__19); __Pyx_GIVEREF(__pyx_tuple__19); - __pyx_codeobj__20 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__19, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_init, 503, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__20)) __PYX_ERR(0, 503, __pyx_L1_error) + __pyx_codeobj__20 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__19, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_fasttext_inner_pyx, __pyx_n_s_init, 504, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__20)) __PYX_ERR(0, 504, __pyx_L1_error) __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -9876,27 +9854,6 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "gensim/models/fasttext_inner.pyx":16 - * from libc.stdio cimport printf - * from libc.stdlib cimport calloc, free - * from gensim import matutils # <<<<<<<<<<<<<< - * - * - */ - __pyx_t_3 = PyList_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 16, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_INCREF(__pyx_n_s_matutils); - __Pyx_GIVEREF(__pyx_n_s_matutils); - PyList_SET_ITEM(__pyx_t_3, 0, __pyx_n_s_matutils); - __pyx_t_4 = __Pyx_Import(__pyx_n_s_gensim, __pyx_t_3, -1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 16, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_4, __pyx_n_s_matutils); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 16, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_matutils, __pyx_t_3) < 0) __PYX_ERR(0, 16, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - - /* "gensim/models/fasttext_inner.pyx":20 * * # scipy <= 0.15 * try: # <<<<<<<<<<<<<< @@ -9906,34 +9863,34 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) { __Pyx_PyThreadState_declare __Pyx_PyThreadState_assign - __Pyx_ExceptionSave(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7); + __Pyx_ExceptionSave(&__pyx_t_4, &__pyx_t_5, &__pyx_t_6); + __Pyx_XGOTREF(__pyx_t_4); __Pyx_XGOTREF(__pyx_t_5); __Pyx_XGOTREF(__pyx_t_6); - __Pyx_XGOTREF(__pyx_t_7); /*try:*/ { - /* "gensim/models/fasttext_inner.pyx":21 + /* "gensim/models/fasttext_inner.pyx":17 * # scipy <= 0.15 * try: * from scipy.linalg.blas import fblas # <<<<<<<<<<<<<< * except ImportError: * # in scipy > 0.15, fblas function has been removed */ - __pyx_t_4 = PyList_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 21, __pyx_L2_error) - __Pyx_GOTREF(__pyx_t_4); + __pyx_t_3 = PyList_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 17, __pyx_L2_error) + __Pyx_GOTREF(__pyx_t_3); __Pyx_INCREF(__pyx_n_s_fblas); __Pyx_GIVEREF(__pyx_n_s_fblas); - PyList_SET_ITEM(__pyx_t_4, 0, __pyx_n_s_fblas); - __pyx_t_3 = __Pyx_Import(__pyx_n_s_scipy_linalg_blas, __pyx_t_4, -1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 21, __pyx_L2_error) + PyList_SET_ITEM(__pyx_t_3, 0, __pyx_n_s_fblas); + __pyx_t_7 = __Pyx_Import(__pyx_n_s_scipy_linalg_blas, __pyx_t_3, -1); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 17, __pyx_L2_error) + __Pyx_GOTREF(__pyx_t_7); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_ImportFrom(__pyx_t_7, __pyx_n_s_fblas); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 17, __pyx_L2_error) __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_fblas); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 21, __pyx_L2_error) - __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_fblas, __pyx_t_4) < 0) __PYX_ERR(0, 21, __pyx_L2_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (PyDict_SetItem(__pyx_d, __pyx_n_s_fblas, __pyx_t_3) < 0) __PYX_ERR(0, 17, __pyx_L2_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - /* "gensim/models/fasttext_inner.pyx":20 + /* "gensim/models/fasttext_inner.pyx":16 * * # scipy <= 0.15 * try: # <<<<<<<<<<<<<< @@ -9941,15 +9898,15 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) * except ImportError: */ } + __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; - __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0; goto __pyx_L7_try_end; __pyx_L2_error:; - __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0; - /* "gensim/models/fasttext_inner.pyx":22 + /* "gensim/models/fasttext_inner.pyx":18 * try: * from scipy.linalg.blas import fblas * except ImportError: # <<<<<<<<<<<<<< @@ -9959,93 +9916,72 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) __pyx_t_8 = __Pyx_PyErr_ExceptionMatches(__pyx_builtin_ImportError); if (__pyx_t_8) { __Pyx_AddTraceback("gensim.models.fasttext_inner", __pyx_clineno, __pyx_lineno, __pyx_filename); - if (__Pyx_GetException(&__pyx_t_3, &__pyx_t_4, &__pyx_t_9) < 0) __PYX_ERR(0, 22, __pyx_L4_except_error) + if (__Pyx_GetException(&__pyx_t_7, &__pyx_t_3, &__pyx_t_9) < 0) __PYX_ERR(0, 18, __pyx_L4_except_error) + __Pyx_GOTREF(__pyx_t_7); __Pyx_GOTREF(__pyx_t_3); - __Pyx_GOTREF(__pyx_t_4); __Pyx_GOTREF(__pyx_t_9); - /* "gensim/models/fasttext_inner.pyx":24 + /* "gensim/models/fasttext_inner.pyx":20 * except ImportError: * # in scipy > 0.15, fblas function has been removed * import scipy.linalg.blas as fblas # <<<<<<<<<<<<<< * * from word2vec_inner cimport bisect_left, random_int32, \ */ - __pyx_t_10 = PyList_New(1); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 24, __pyx_L4_except_error) + __pyx_t_10 = PyList_New(1); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 20, __pyx_L4_except_error) __Pyx_GOTREF(__pyx_t_10); __Pyx_INCREF(__pyx_n_s__14); __Pyx_GIVEREF(__pyx_n_s__14); PyList_SET_ITEM(__pyx_t_10, 0, __pyx_n_s__14); - __pyx_t_11 = __Pyx_Import(__pyx_n_s_scipy_linalg_blas, __pyx_t_10, -1); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 24, __pyx_L4_except_error) + __pyx_t_11 = __Pyx_Import(__pyx_n_s_scipy_linalg_blas, __pyx_t_10, -1); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 20, __pyx_L4_except_error) __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_fblas, __pyx_t_11) < 0) __PYX_ERR(0, 24, __pyx_L4_except_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_fblas, __pyx_t_11) < 0) __PYX_ERR(0, 20, __pyx_L4_except_error) __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; goto __pyx_L3_exception_handled; } goto __pyx_L4_except_error; __pyx_L4_except_error:; - /* "gensim/models/fasttext_inner.pyx":20 + /* "gensim/models/fasttext_inner.pyx":16 * * # scipy <= 0.15 * try: # <<<<<<<<<<<<<< * from scipy.linalg.blas import fblas * except ImportError: */ + __Pyx_XGIVEREF(__pyx_t_4); __Pyx_XGIVEREF(__pyx_t_5); __Pyx_XGIVEREF(__pyx_t_6); - __Pyx_XGIVEREF(__pyx_t_7); - __Pyx_ExceptionReset(__pyx_t_5, __pyx_t_6, __pyx_t_7); + __Pyx_ExceptionReset(__pyx_t_4, __pyx_t_5, __pyx_t_6); goto __pyx_L1_error; __pyx_L3_exception_handled:; + __Pyx_XGIVEREF(__pyx_t_4); __Pyx_XGIVEREF(__pyx_t_5); __Pyx_XGIVEREF(__pyx_t_6); - __Pyx_XGIVEREF(__pyx_t_7); - __Pyx_ExceptionReset(__pyx_t_5, __pyx_t_6, __pyx_t_7); + __Pyx_ExceptionReset(__pyx_t_4, __pyx_t_5, __pyx_t_6); __pyx_L7_try_end:; } - /* "gensim/models/fasttext_inner.pyx":32 + /* "gensim/models/fasttext_inner.pyx":28 * our_dot_double, our_dot_float, our_dot_noblas, our_saxpy_noblas * * REAL = np.float32 # <<<<<<<<<<<<<< * * DEF MAX_SENTENCE_LEN = 10000 */ - __pyx_t_9 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 32, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_9); - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_9, __pyx_n_s_float32); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 32, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_REAL, __pyx_t_4) < 0) __PYX_ERR(0, 32, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - - /* "gensim/models/fasttext_inner.pyx":36 - * DEF MAX_SENTENCE_LEN = 10000 - * DEF MAX_SUBWORDS = 1000 - * from word2vec import FAST_VERSION # <<<<<<<<<<<<<< - * - * DEF EXP_TABLE_SIZE = 1000 - */ - __pyx_t_4 = PyList_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 36, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_INCREF(__pyx_n_s_FAST_VERSION); - __Pyx_GIVEREF(__pyx_n_s_FAST_VERSION); - PyList_SET_ITEM(__pyx_t_4, 0, __pyx_n_s_FAST_VERSION); - __pyx_t_9 = __Pyx_Import(__pyx_n_s_word2vec, __pyx_t_4, -1); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 36, __pyx_L1_error) + __pyx_t_9 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 28, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_ImportFrom(__pyx_t_9, __pyx_n_s_FAST_VERSION); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 36, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_4) < 0) __PYX_ERR(0, 36, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_9, __pyx_n_s_float32); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 28, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + if (PyDict_SetItem(__pyx_d, __pyx_n_s_REAL, __pyx_t_3) < 0) __PYX_ERR(0, 28, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":44 + /* "gensim/models/fasttext_inner.pyx":39 * cdef REAL_t[EXP_TABLE_SIZE] LOG_TABLE * * cdef int ONE = 1 # <<<<<<<<<<<<<< @@ -10054,7 +9990,7 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) */ __pyx_v_6gensim_6models_14fasttext_inner_ONE = 1; - /* "gensim/models/fasttext_inner.pyx":45 + /* "gensim/models/fasttext_inner.pyx":40 * * cdef int ONE = 1 * cdef REAL_t ONEF = 1.0 # <<<<<<<<<<<<<< @@ -10063,87 +9999,87 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) */ __pyx_v_6gensim_6models_14fasttext_inner_ONEF = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)1.0); - /* "gensim/models/fasttext_inner.pyx":256 + /* "gensim/models/fasttext_inner.pyx":247 * * * def train_batch_sg(model, sentences, alpha, _work, _l1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_1train_batch_sg, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 256, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_sg, __pyx_t_9) < 0) __PYX_ERR(0, 256, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + __pyx_t_3 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_1train_batch_sg, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 247, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_sg, __pyx_t_3) < 0) __PYX_ERR(0, 247, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":380 + /* "gensim/models/fasttext_inner.pyx":377 * * * def train_batch_cbow(model, sentences, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_3train_batch_cbow, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 380, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_cbow, __pyx_t_9) < 0) __PYX_ERR(0, 380, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + __pyx_t_3 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_3train_batch_cbow, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 377, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_batch_cbow, __pyx_t_3) < 0) __PYX_ERR(0, 377, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":503 + /* "gensim/models/fasttext_inner.pyx":504 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_t_9 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_5init, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 503, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_9) < 0) __PYX_ERR(0, 503, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + __pyx_t_3 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14fasttext_inner_5init, NULL, __pyx_n_s_gensim_models_fasttext_inner); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 504, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_3) < 0) __PYX_ERR(0, 504, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":543 + /* "gensim/models/fasttext_inner.pyx":544 * return 2 * * FAST_VERSION = init() # initialize the module # <<<<<<<<<<<<<< * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN */ - __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 543, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_3 = NULL; - if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_4))) { - __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_4); - if (likely(__pyx_t_3)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4); - __Pyx_INCREF(__pyx_t_3); + __pyx_t_9 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 544, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_9); + __pyx_t_7 = NULL; + if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_9))) { + __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_9); + if (likely(__pyx_t_7)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_9); + __Pyx_INCREF(__pyx_t_7); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_4, function); + __Pyx_DECREF_SET(__pyx_t_9, function); } } - if (__pyx_t_3) { - __pyx_t_9 = __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_t_3); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 543, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + if (__pyx_t_7) { + __pyx_t_3 = __Pyx_PyObject_CallOneArg(__pyx_t_9, __pyx_t_7); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 544, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; } else { - __pyx_t_9 = __Pyx_PyObject_CallNoArg(__pyx_t_4); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 543, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_CallNoArg(__pyx_t_9); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 544, __pyx_L1_error) } - __Pyx_GOTREF(__pyx_t_9); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_9) < 0) __PYX_ERR(0, 543, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_3) < 0) __PYX_ERR(0, 544, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/fasttext_inner.pyx":544 + /* "gensim/models/fasttext_inner.pyx":545 * * FAST_VERSION = init() # initialize the module * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN # <<<<<<<<<<<<<< */ - if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_int_10000) < 0) __PYX_ERR(0, 544, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_int_10000) < 0) __PYX_ERR(0, 545, __pyx_L1_error) /* "gensim/models/fasttext_inner.pyx":1 * #!/usr/bin/env cython # <<<<<<<<<<<<<< * # cython: boundscheck=False * # cython: wraparound=False */ - __pyx_t_9 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 1, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_9) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + __pyx_t_3 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_3); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_3) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; /* "../../../../../virtualenvironments/gensim/fasttext_2.7/local/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":1021 * raise ImportError("numpy.core.umath failed to import") @@ -10160,7 +10096,7 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) __Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_2); __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_7); __Pyx_XDECREF(__pyx_t_9); __Pyx_XDECREF(__pyx_t_10); __Pyx_XDECREF(__pyx_t_11); diff --git a/gensim/models/fasttext_inner.pyx b/gensim/models/fasttext_inner.pyx index 4dd1f3df27..0020c5a4d6 100644 --- a/gensim/models/fasttext_inner.pyx +++ b/gensim/models/fasttext_inner.pyx @@ -11,10 +11,6 @@ cimport numpy as np from libc.math cimport exp from libc.math cimport log from libc.string cimport memset -from libc.stdio cimport printf -from libc.stdlib cimport calloc, free -from gensim import matutils - # scipy <= 0.15 try: @@ -33,7 +29,6 @@ REAL = np.float32 DEF MAX_SENTENCE_LEN = 10000 DEF MAX_SUBWORDS = 1000 -from word2vec import FAST_VERSION DEF EXP_TABLE_SIZE = 1000 DEF MAX_EXP = 6 @@ -152,17 +147,16 @@ cdef unsigned long long fast_sentence_cbow_neg( for m in range(j, k): if m == i: continue - else: + count += ONEF + our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) + for d in range(subwords_idx_len[m]): count += ONEF - our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) - for d in range(subwords_idx_len[m]): - count += ONEF - our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) if count > (0.5): - inv_count = ONEF/count + inv_count = ONEF / count if cbow_mean: - sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + sscal(&size, &inv_count, neu1, &ONE) memset(work, 0, size * cython.sizeof(REAL_t)) @@ -170,12 +164,11 @@ cdef unsigned long long fast_sentence_cbow_neg( if d == 0: target_index = word_index label = ONEF - else: - target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) - next_random = (next_random * 25214903917ULL + 11) & modulo - if target_index == word_index: - continue - label = 0.0 + target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + next_random = (next_random * 25214903917ULL + 11) & modulo + if target_index == word_index: + continue + label = 0.0 row2 = target_index * size f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) @@ -188,7 +181,7 @@ cdef unsigned long long fast_sentence_cbow_neg( our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) if not cbow_mean: # divide error over summed window vectors - sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + sscal(&size, &inv_count, work, &ONE) for m in range(j,k): if m == i: @@ -218,16 +211,15 @@ cdef void fast_sentence_cbow_hs( for m in range(j, k): if m == i: continue - else: + count += ONEF + our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) + for d in range(subwords_idx_len[m]): count += ONEF - our_saxpy(&size, &ONEF, &syn0_vocab[indexes[m] * size], &ONE, neu1, &ONE) - for d in range(subwords_idx_len[m]): - count += ONEF - our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) + our_saxpy(&size, &ONEF, &syn0_ngrams[subwords_idx[m][d] * size], &ONE, neu1, &ONE) if count > (0.5): - inv_count = ONEF/count + inv_count = ONEF / count if cbow_mean: - sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + sscal(&size, &inv_count, neu1, &ONE) memset(work, 0, size * cython.sizeof(REAL_t)) for b in range(codelens[i]): @@ -242,15 +234,14 @@ cdef void fast_sentence_cbow_hs( our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) if not cbow_mean: # divide error over summed window vectors - sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + sscal(&size, &inv_count, work, &ONE) for m in range(j,k): if m == i: continue - else: - our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) - for d in range(subwords_idx_len[m]): - our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) + our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) + for d in range(subwords_idx_len[m]): + our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) def train_batch_sg(model, sentences, alpha, _work, _l1): @@ -340,7 +331,7 @@ def train_batch_sg(model, sentences, alpha, _work, _l1): effective_words += 1 if effective_words == MAX_SENTENCE_LEN: - break # TODO: log warning, tally overflow? + break # keep track of which words go into which sentence, so we don't train # across sentence boundaries. @@ -349,7 +340,7 @@ def train_batch_sg(model, sentences, alpha, _work, _l1): sentence_idx[effective_sentences] = effective_words if effective_words == MAX_SENTENCE_LEN: - break # TODO: log warning, tally overflow? + break # precompute "reduced window" offsets in a single randint() call for i, item in enumerate(model.random.randint(0, window, effective_words)): @@ -370,9 +361,15 @@ def train_batch_sg(model, sentences, alpha, _work, _l1): if j == i: continue if hs: - fast_sentence_sg_hs(points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, word_locks_ngrams) + fast_sentence_sg_hs( + points[j], codes[j], codelens[j], syn0_vocab, syn0_ngrams, syn1, size, + subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, word_locks_vocab, + word_locks_ngrams) if negative: - next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, next_random, word_locks_vocab, word_locks_ngrams) + next_random = fast_sentence_sg_neg( + negative, cum_table, cum_table_len, syn0_vocab, syn0_ngrams, syn1neg, size, + indexes[j], subwords_idx[i], subwords_idx_len[i], _alpha, work, l1, + next_random, word_locks_vocab, word_locks_ngrams) return effective_words @@ -462,7 +459,7 @@ def train_batch_cbow(model, sentences, alpha, _work, _neu1): points[effective_words] = np.PyArray_DATA(word.point) effective_words += 1 if effective_words == MAX_SENTENCE_LEN: - break # TODO: log warning, tally overflow? + break # keep track of which words go into which sentence, so we don't train # across sentence boundaries. @@ -471,7 +468,7 @@ def train_batch_cbow(model, sentences, alpha, _work, _neu1): sentence_idx[effective_sentences] = effective_words if effective_words == MAX_SENTENCE_LEN: - break # TODO: log warning, tally overflow? + break # precompute "reduced window" offsets in a single randint() call for i, item in enumerate(model.random.randint(0, window, effective_words)): @@ -491,11 +488,15 @@ def train_batch_cbow(model, sentences, alpha, _work, _neu1): k = idx_end if hs: - fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0_vocab, syn0_ngrams, syn1, size, indexes,subwords_idx, - subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, word_locks_ngrams) + fast_sentence_cbow_hs( + points[i], codes[i], codelens, neu1, syn0_vocab, syn0_ngrams, syn1, size,indexes, + subwords_idx,subwords_idx_len,_alpha, work, i, j, k, cbow_mean, word_locks_vocab, + word_locks_ngrams) if negative: - next_random = fast_sentence_cbow_neg(negative, cum_table, cum_table_len, codelens, neu1, syn0_vocab, syn0_ngrams, syn1neg, size, indexes, subwords_idx, - subwords_idx_len, _alpha, work, i, j, k, cbow_mean, next_random, word_locks_vocab, word_locks_ngrams) + next_random = fast_sentence_cbow_neg( + negative, cum_table, cum_table_len, codelens, neu1, syn0_vocab, syn0_ngrams, + syn1neg, size, indexes, subwords_idx, subwords_idx_len, _alpha, work, i, j, k, + cbow_mean, next_random, word_locks_vocab, word_locks_ngrams) return effective_words From 5a4e627bea382ed5f39c2b4c1118605a0d77abaa Mon Sep 17 00:00:00 2001 From: manneshiva Date: Fri, 1 Dec 2017 14:47:15 +0530 Subject: [PATCH 13/22] minor: removes redundant `else` --- gensim/models/fasttext_inner.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gensim/models/fasttext_inner.pyx b/gensim/models/fasttext_inner.pyx index 0020c5a4d6..5efae09433 100644 --- a/gensim/models/fasttext_inner.pyx +++ b/gensim/models/fasttext_inner.pyx @@ -186,10 +186,9 @@ cdef unsigned long long fast_sentence_cbow_neg( for m in range(j,k): if m == i: continue - else: - our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) - for d in range(subwords_idx_len[m]): - our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) + our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) + for d in range(subwords_idx_len[m]): + our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) return next_random @@ -543,3 +542,4 @@ def init(): FAST_VERSION = init() # initialize the module MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN + From b471fde9c60138e5cea4349006dda906d7384ad1 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Mon, 4 Dec 2017 03:54:15 +0530 Subject: [PATCH 14/22] adds docstring --- gensim/models/fasttext.py | 178 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index c5674f1a06..f32506abb6 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -1,5 +1,27 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +# +# Copyright (C) 2013 Radim Rehurek + +""" +Learn word representations via fasttext's "skip-gram and CBOW models", using either +hierarchical softmax or negative sampling [1]_. + +NOTE: There are more ways to get word vectors in Gensim than just FastText. +See wrappers for VarEmbed and WordRank or Word2Vec + +This module allows training a word embedding from a training corpus with the additional ability +to obtain word vectors for out-of-vocabulary words. + +For a tutorial on gensim's native fasttext, refer the noteboook -- +https://github.com/RaRe-Technologies/gensim/blob/develop/docs/notebooks/FastText_Tutorial.ipynb + +**Make sure you have a C compiler before installing gensim, to use optimized (compiled) fasttext training** + +.. [1] P. Bojanowski, E. Grave, A. Joulin, T. Mikolov + Enriching Word Vectors with Subword Information. In arXiv preprint arXiv:1607.04606. + +""" import logging @@ -24,6 +46,17 @@ MAX_WORDS_IN_BATCH = 10000 def train_batch_cbow(model, sentences, alpha, work=None, neu1=None): + """ + Update CBOW model by training on a sequence of sentences. + + Each sentence is a list of string tokens, which are looked up in the model's + vocab dictionary. Called internally from `FastText.train()`. + + This is the non-optimized, Python version. If you have cython installed, gensim + will use the optimized version from fasttext_inner instead. + + """ + result = 0 for sentence in sentences: word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and @@ -58,6 +91,17 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None): return result def train_batch_sg(model, sentences, alpha, work=None, neu1=None): + """ + Update skip-gram model by training on a sequence of sentences. + + Each sentence is a list of string tokens, which are looked up in the model's + vocab dictionary. Called internally from `FastText.train()`. + + This is the non-optimized, Python version. If you have cython installed, gensim + will use the optimized version from fasttext_inner instead. + + """ + result = 0 for sentence in sentences: word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and @@ -82,11 +126,97 @@ def train_batch_sg(model, sentences, alpha, work=None, neu1=None): class FastText(Word2Vec): + """ + Class for training, using and evaluating word representations learned using method described in + `Enriching Word Vectors with Subword Information` aka Fasttext. + + The model can be stored/loaded via its `save()` and `load()` methods, or loaded in a format + compatible with the original fasttext implementation via `load_fasttext_format()`. + + """ + def __init__( self, sentences=None, sg=0, hs=0, size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=None, word_ngrams=1, sample=1e-3, seed=1, workers=3, min_alpha=0.0001, negative=5, cbow_mean=1, hashfxn=hash, iter=5, null_word=0, min_n=3, max_n=6, sorted_vocab=1, bucket=2000000, trim_rule=None, batch_words=MAX_WORDS_IN_BATCH): + """ + Initialize the model from an iterable of `sentences`. Each sentence is a + list of words (unicode strings) that will be used for training. + + The `sentences` iterable can be simply a list, but for larger corpora, + consider an iterable that streams the sentences directly from disk/network. + See :Word2Vec:`BrownCorpus`, :Word2Vec:`Text8Corpus` or :Word2Vec:`LineSentence` in + this module for such examples. + + If you don't supply `sentences`, the model is left uninitialized -- use if + you plan to initialize it in some other way. + + `sg` defines the training algorithm. By default (`sg=0`), CBOW is used. + Otherwise (`sg=1`), skip-gram is employed. + + `size` is the dimensionality of the feature vectors. + + `window` is the maximum distance between the current and predicted word within a sentence. + + `alpha` is the initial learning rate (will linearly drop to `min_alpha` as training progresses). + + `seed` = for the random number generator. Initial vectors for each + word are seeded with a hash of the concatenation of word + str(seed). + Note that for a fully deterministically-reproducible run, you must also limit the model to + a single worker thread, to eliminate ordering jitter from OS thread scheduling. (In Python + 3, reproducibility between interpreter launches also requires use of the PYTHONHASHSEED + environment variable to control hash randomization.) + + `min_count` = ignore all words with total frequency lower than this. + + `max_vocab_size` = limit RAM during vocabulary building; if there are more unique + words than this, then prune the infrequent ones. Every 10 million word types + need about 1GB of RAM. Set to `None` for no limit (default). + + `sample` = threshold for configuring which higher-frequency words are randomly downsampled; + default is 1e-3, useful range is (0, 1e-5). + + `workers` = use this many worker threads to train the model (=faster training with multicore machines). + + `hs` = if 1, hierarchical softmax will be used for model training. + If set to 0 (default), and `negative` is non-zero, negative sampling will be used. + + `negative` = if > 0, negative sampling will be used, the int for negative + specifies how many "noise words" should be drawn (usually between 5-20). + Default is 5. If set to 0, no negative samping is used. + + `cbow_mean` = if 0, use the sum of the context word vectors. If 1 (default), use the mean. + Only applies when cbow is used. + + `hashfxn` = hash function to use to randomly initialize weights, for increased + training reproducibility. Default is Python's rudimentary built in hash function. + + `iter` = number of iterations (epochs) over the corpus. Default is 5. + + `trim_rule` = vocabulary trimming rule, specifies whether certain words should remain + in the vocabulary, be trimmed away, or handled using the default (discard if word count < min_count). + Can be None (min_count will be used), or a callable that accepts parameters (word, count, min_count) and + returns either `utils.RULE_DISCARD`, `utils.RULE_KEEP` or `utils.RULE_DEFAULT`. + Note: The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part + of the model. + + `sorted_vocab` = if 1 (default), sort the vocabulary by descending frequency before + assigning word indexes. + + `batch_words` = target size (in words) for batches of examples passed to worker threads (and + thus cython routines). Default is 10000. (Larger batches will be passed if individual + texts are longer than 10000 words, but the standard cython code truncates to that maximum.) + + `min_n` = min length of char ngrams to be used for training word representations. Default is 3. + + `max_n` = max length of char ngrams to be used for training word representations. Set `max_n` to be + lesser than `min_n` to avoid char ngrams being used. Default is 6. + + `bucket` = Word and character ngram features are hashed into a fixed number of buckets, in order to limit the + memory usage of the model. This option specifies the number of buckets used by the model. Default: 2000000 + + """ # fastText specific params self.bucket = bucket @@ -108,6 +238,11 @@ def initialize_word_vectors(self): self.wv.max_n = self.max_n def build_vocab(self, sentences, keep_raw_vocab=False, trim_rule=None, progress_per=10000, update=False): + """ + Build vocabulary from a sequence of sentences (can be a once-only generator stream). + Each sentence must be a list of unicode strings. + """ + if update: if not len(self.wv.vocab): raise RuntimeError( @@ -122,6 +257,13 @@ def build_vocab(self, sentences, keep_raw_vocab=False, trim_rule=None, progress_ self.init_ngrams(update=update) def init_ngrams(self, update=False): + """ + Computes ngrams of all words present in vocabulary and stores vectors for only those ngrams. + Vectors for other ngrams are initialized with a random uniform distribution in FastText. + If `update` is `True`, the new vocab words and their new ngrams word vectors are initialized + with random uniform distribution and updated/added to the exisiting vocab word and ngram vectors. + """ + if not update: self.wv.ngrams = {} self.wv.syn0_vocab = empty((len(self.wv.vocab), self.vector_size), dtype=REAL) @@ -197,6 +339,8 @@ def init_ngrams(self, update=False): self.syn0_ngrams_lockf = vstack([self.syn0_ngrams_lockf, new_ngram_lockf_rows]) def reset_ngram_weights(self): + """Reset all projection weights to an initial (untrained) state, but keep the existing vocabulary and their ngrams.""" + rand_obj = np.random rand_obj.seed(self.seed) for index in range(len(self.wv.vocab)): @@ -205,6 +349,11 @@ def reset_ngram_weights(self): self.wv.syn0_ngrams[index] = rand_obj.uniform(-1.0 / self.vector_size, 1.0 / self.vector_size, self.vector_size) def _do_train_job(self, sentences, alpha, inits): + """ + Train a single batch of sentences. Return 2-tuple `(effective word count after + ignoring unknown words and sentence length trimming, total word count)`. + """ + work, neu1 = inits tally = 0 if self.sg: @@ -217,6 +366,21 @@ def _do_train_job(self, sentences, alpha, inits): def train(self, sentences, total_examples=None, total_words=None, epochs=None, start_alpha=None, end_alpha=None, word_count=0, queue_factor=2, report_delay=1.0): + """ + Update the model's neural weights from a sequence of sentences (can be a once-only generator stream). + For FastText, each sentence must be a list of unicode strings. (Subclasses may accept other examples.) + + To support linear learning-rate decay from (initial) alpha to min_alpha, and accurate + progres-percentage logging, either total_examples (count of sentences) or total_words (count of + raw words in sentences) MUST be provided. (If the corpus is the same as was provided to + `build_vocab()`, the count of examples in that corpus will be available in the model's + `corpus_count` property.) + + To avoid common mistakes around the model's ability to do multiple training passes itself, an + explicit `epochs` argument MUST be provided. In the common and recommended case, where `train()` + is only called once, the model's cached `iter` value should be supplied as `epochs` value. + """ + self.neg_labels = [] if self.negative > 0: # precompute negative labels optimization for pure-python training @@ -241,6 +405,20 @@ def get_vocab_word_vecs(self): self.wv.syn0[v.index] = word_vec def word_vec(self, word, use_norm=False): + """ + Accept a single word as input. + Returns the word's representations in vector space, as a 1D numpy array. + + The word can be out-of-vocabulary as long as ngrams for the word are present. + For words with all ngrams absent, a KeyError is raised. + + Example:: + + >>> trained_model['office'] + array([ -1.40128313e-02, ...]) + + """ + return FastTextKeyedVectors.word_vec(self.wv, word, use_norm=use_norm) @classmethod From eee20ef34ade842e05d65ec75f20e02581736cdf Mon Sep 17 00:00:00 2001 From: manneshiva Date: Tue, 5 Dec 2017 03:40:51 +0530 Subject: [PATCH 15/22] changes docstrings style, splits long lines --- gensim/models/fasttext.py | 73 ++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 40 deletions(-) diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index f32506abb6..afae14756a 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -1,10 +1,9 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# -# Copyright (C) 2013 Radim Rehurek +# Authors: Chinmaya Pancholi , Shiva Manne +# Copyright (C) 2017 RaRe Technologies s.r.o. -""" -Learn word representations via fasttext's "skip-gram and CBOW models", using either +"""Learn word representations via fasttext's "skip-gram and CBOW models", using either hierarchical softmax or negative sampling [1]_. NOTE: There are more ways to get word vectors in Gensim than just FastText. @@ -13,7 +12,7 @@ This module allows training a word embedding from a training corpus with the additional ability to obtain word vectors for out-of-vocabulary words. -For a tutorial on gensim's native fasttext, refer the noteboook -- +For a tutorial on gensim's native fasttext, refer to the noteboook -- https://github.com/RaRe-Technologies/gensim/blob/develop/docs/notebooks/FastText_Tutorial.ipynb **Make sure you have a C compiler before installing gensim, to use optimized (compiled) fasttext training** @@ -46,8 +45,7 @@ MAX_WORDS_IN_BATCH = 10000 def train_batch_cbow(model, sentences, alpha, work=None, neu1=None): - """ - Update CBOW model by training on a sequence of sentences. + """Update CBOW model by training on a sequence of sentences. Each sentence is a list of string tokens, which are looked up in the model's vocab dictionary. Called internally from `FastText.train()`. @@ -56,7 +54,6 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None): will use the optimized version from fasttext_inner instead. """ - result = 0 for sentence in sentences: word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and @@ -86,13 +83,13 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None): if (subwords_indices[0] or subwords_indices[1]) and model.cbow_mean: l1 /= (len(subwords_indices[0]) + len(subwords_indices[1])) - train_cbow_pair(model, word, subwords_indices, l1, alpha, is_ft=True) # train on the sliding window for target word + # train on the sliding window for target word + train_cbow_pair(model, word, subwords_indices, l1, alpha, is_ft=True) result += len(word_vocabs) return result def train_batch_sg(model, sentences, alpha, work=None, neu1=None): - """ - Update skip-gram model by training on a sequence of sentences. + """Update skip-gram model by training on a sequence of sentences. Each sentence is a list of string tokens, which are looked up in the model's vocab dictionary. Called internally from `FastText.train()`. @@ -101,7 +98,6 @@ def train_batch_sg(model, sentences, alpha, work=None, neu1=None): will use the optimized version from fasttext_inner instead. """ - result = 0 for sentence in sentences: word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and @@ -126,22 +122,19 @@ def train_batch_sg(model, sentences, alpha, work=None, neu1=None): class FastText(Word2Vec): - """ - Class for training, using and evaluating word representations learned using method described in + """Class for training, using and evaluating word representations learned using method described in `Enriching Word Vectors with Subword Information` aka Fasttext. The model can be stored/loaded via its `save()` and `load()` methods, or loaded in a format compatible with the original fasttext implementation via `load_fasttext_format()`. """ - def __init__( self, sentences=None, sg=0, hs=0, size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=None, word_ngrams=1, sample=1e-3, seed=1, workers=3, min_alpha=0.0001, - negative=5, cbow_mean=1, hashfxn=hash, iter=5, null_word=0, min_n=3, max_n=6, sorted_vocab=1, bucket=2000000, - trim_rule=None, batch_words=MAX_WORDS_IN_BATCH): - """ - Initialize the model from an iterable of `sentences`. Each sentence is a + negative=5, cbow_mean=1, hashfxn=hash, iter=5, null_word=0, min_n=3, max_n=6, sorted_vocab=1, + bucket=2000000, trim_rule=None, batch_words=MAX_WORDS_IN_BATCH): + """Initialize the model from an iterable of `sentences`. Each sentence is a list of words (unicode strings) that will be used for training. The `sentences` iterable can be simply a list, but for larger corpora, @@ -216,8 +209,15 @@ def __init__( `bucket` = Word and character ngram features are hashed into a fixed number of buckets, in order to limit the memory usage of the model. This option specifies the number of buckets used by the model. Default: 2000000 - """ + Example + ------- + Initialize and train a `FastText` model: + >>> model = FastText(sg=1, hs=0,window=2, negative=5, iter=4) + >>> model.build_vocab(sentences) + >>> model.train(sentences, total_examples=model.corpus_count, epochs=model.iter) + + """ # fastText specific params self.bucket = bucket self.word_ngrams = word_ngrams @@ -238,11 +238,9 @@ def initialize_word_vectors(self): self.wv.max_n = self.max_n def build_vocab(self, sentences, keep_raw_vocab=False, trim_rule=None, progress_per=10000, update=False): - """ - Build vocabulary from a sequence of sentences (can be a once-only generator stream). + """Build vocabulary from a sequence of sentences (can be a once-only generator stream). Each sentence must be a list of unicode strings. """ - if update: if not len(self.wv.vocab): raise RuntimeError( @@ -257,13 +255,11 @@ def build_vocab(self, sentences, keep_raw_vocab=False, trim_rule=None, progress_ self.init_ngrams(update=update) def init_ngrams(self, update=False): - """ - Computes ngrams of all words present in vocabulary and stores vectors for only those ngrams. + """Computes ngrams of all words present in vocabulary and stores vectors for only those ngrams. Vectors for other ngrams are initialized with a random uniform distribution in FastText. If `update` is `True`, the new vocab words and their new ngrams word vectors are initialized with random uniform distribution and updated/added to the exisiting vocab word and ngram vectors. """ - if not update: self.wv.ngrams = {} self.wv.syn0_vocab = empty((len(self.wv.vocab), self.vector_size), dtype=REAL) @@ -339,21 +335,21 @@ def init_ngrams(self, update=False): self.syn0_ngrams_lockf = vstack([self.syn0_ngrams_lockf, new_ngram_lockf_rows]) def reset_ngram_weights(self): - """Reset all projection weights to an initial (untrained) state, but keep the existing vocabulary and their ngrams.""" - + """Reset all projection weights to an initial (untrained) state, but keep the + existing vocabulary and their ngrams.""" rand_obj = np.random rand_obj.seed(self.seed) for index in range(len(self.wv.vocab)): - self.wv.syn0_vocab[index] = rand_obj.uniform(-1.0 / self.vector_size, 1.0 / self.vector_size, self.vector_size) + self.wv.syn0_vocab[index] = rand_obj.uniform(-1.0 / self.vector_size, 1.0 / self.vector_size, + self.vector_size) for index in range(len(self.wv.hash2index)): - self.wv.syn0_ngrams[index] = rand_obj.uniform(-1.0 / self.vector_size, 1.0 / self.vector_size, self.vector_size) + self.wv.syn0_ngrams[index] = rand_obj.uniform(-1.0 / self.vector_size, 1.0 / self.vector_size, + self.vector_size) def _do_train_job(self, sentences, alpha, inits): - """ - Train a single batch of sentences. Return 2-tuple `(effective word count after + """Train a single batch of sentences. Return 2-tuple `(effective word count after ignoring unknown words and sentence length trimming, total word count)`. """ - work, neu1 = inits tally = 0 if self.sg: @@ -366,8 +362,7 @@ def _do_train_job(self, sentences, alpha, inits): def train(self, sentences, total_examples=None, total_words=None, epochs=None, start_alpha=None, end_alpha=None, word_count=0, queue_factor=2, report_delay=1.0): - """ - Update the model's neural weights from a sequence of sentences (can be a once-only generator stream). + """Update the model's neural weights from a sequence of sentences (can be a once-only generator stream). For FastText, each sentence must be a list of unicode strings. (Subclasses may accept other examples.) To support linear learning-rate decay from (initial) alpha to min_alpha, and accurate @@ -380,7 +375,6 @@ def train(self, sentences, total_examples=None, total_words=None, explicit `epochs` argument MUST be provided. In the common and recommended case, where `train()` is only called once, the model's cached `iter` value should be supplied as `epochs` value. """ - self.neg_labels = [] if self.negative > 0: # precompute negative labels optimization for pure-python training @@ -405,20 +399,19 @@ def get_vocab_word_vecs(self): self.wv.syn0[v.index] = word_vec def word_vec(self, word, use_norm=False): - """ - Accept a single word as input. + """Accept a single word as input. Returns the word's representations in vector space, as a 1D numpy array. The word can be out-of-vocabulary as long as ngrams for the word are present. For words with all ngrams absent, a KeyError is raised. - Example:: + Example + ------- >>> trained_model['office'] array([ -1.40128313e-02, ...]) """ - return FastTextKeyedVectors.word_vec(self.wv, word, use_norm=use_norm) @classmethod From 1b3e2d3cc8037b978907ba4e46953d15e64e6d57 Mon Sep 17 00:00:00 2001 From: Menshikh Ivan Date: Tue, 5 Dec 2017 13:48:34 +0500 Subject: [PATCH 16/22] fix references in fasttext docstring --- gensim/models/fasttext.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index 67ceee0cd6..5c9b55b23b 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -139,8 +139,8 @@ def __init__( The `sentences` iterable can be simply a list, but for larger corpora, consider an iterable that streams the sentences directly from disk/network. - See :Word2Vec:`BrownCorpus`, :Word2Vec:`Text8Corpus` or :Word2Vec:`LineSentence` in - this module for such examples. + See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` + or :class:`~gensim.models.word2vec.LineSentence` in this module for such examples. If you don't supply `sentences`, the model is left uninitialized -- use if you plan to initialize it in some other way. From 81eebfb950c36565055753bb7cd8e40d4f040997 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Tue, 5 Dec 2017 19:33:21 +0530 Subject: [PATCH 17/22] adds deleted else in cbow-neg --- gensim/models/fasttext_inner.c | 172 ++++++++++++++++--------------- gensim/models/fasttext_inner.pyx | 11 +- 2 files changed, 94 insertions(+), 89 deletions(-) diff --git a/gensim/models/fasttext_inner.c b/gensim/models/fasttext_inner.c index 45b6875645..46403f6ab5 100644 --- a/gensim/models/fasttext_inner.c +++ b/gensim/models/fasttext_inner.c @@ -2753,7 +2753,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente * if d == 0: * target_index = word_index # <<<<<<<<<<<<<< * label = ONEF - * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + * else: */ __pyx_v_target_index = __pyx_v_word_index; @@ -2761,8 +2761,8 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente * if d == 0: * target_index = word_index * label = ONEF # <<<<<<<<<<<<<< - * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) - * next_random = (next_random * 25214903917ULL + 11) & modulo + * else: + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) */ __pyx_v_label = __pyx_v_6gensim_6models_14fasttext_inner_ONEF; @@ -2773,65 +2773,69 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente * target_index = word_index * label = ONEF */ + goto __pyx_L12; } - /* "gensim/models/fasttext_inner.pyx":167 - * target_index = word_index - * label = ONEF - * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) # <<<<<<<<<<<<<< - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: - */ - __pyx_v_target_index = __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_v_cum_table, ((__pyx_v_next_random >> 16) % (__pyx_v_cum_table[(__pyx_v_cum_table_len - 1)])), 0, __pyx_v_cum_table_len); - /* "gensim/models/fasttext_inner.pyx":168 * label = ONEF - * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) - * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< - * if target_index == word_index: - * continue + * else: + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) # <<<<<<<<<<<<<< + * next_random = (next_random * 25214903917ULL + 11) & modulo + * if target_index == word_index: */ - __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); + /*else*/ { + __pyx_v_target_index = __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_v_cum_table, ((__pyx_v_next_random >> 16) % (__pyx_v_cum_table[(__pyx_v_cum_table_len - 1)])), 0, __pyx_v_cum_table_len); - /* "gensim/models/fasttext_inner.pyx":169 - * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: # <<<<<<<<<<<<<< - * continue - * label = 0.0 + /* "gensim/models/fasttext_inner.pyx":169 + * else: + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< + * if target_index == word_index: + * continue */ - __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); - if (__pyx_t_3) { + __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); /* "gensim/models/fasttext_inner.pyx":170 - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: - * continue # <<<<<<<<<<<<<< - * label = 0.0 + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + * next_random = (next_random * 25214903917ULL + 11) & modulo + * if target_index == word_index: # <<<<<<<<<<<<<< + * continue + * label = 0.0 + */ + __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); + if (__pyx_t_3) { + + /* "gensim/models/fasttext_inner.pyx":171 + * next_random = (next_random * 25214903917ULL + 11) & modulo + * if target_index == word_index: + * continue # <<<<<<<<<<<<<< + * label = 0.0 * */ - goto __pyx_L10_continue; + goto __pyx_L10_continue; - /* "gensim/models/fasttext_inner.pyx":169 - * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: # <<<<<<<<<<<<<< - * continue - * label = 0.0 + /* "gensim/models/fasttext_inner.pyx":170 + * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + * next_random = (next_random * 25214903917ULL + 11) & modulo + * if target_index == word_index: # <<<<<<<<<<<<<< + * continue + * label = 0.0 */ - } + } - /* "gensim/models/fasttext_inner.pyx":171 - * if target_index == word_index: - * continue - * label = 0.0 # <<<<<<<<<<<<<< + /* "gensim/models/fasttext_inner.pyx":172 + * if target_index == word_index: + * continue + * label = 0.0 # <<<<<<<<<<<<<< * * row2 = target_index * size */ - __pyx_v_label = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); + __pyx_v_label = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); + } + __pyx_L12:; - /* "gensim/models/fasttext_inner.pyx":173 - * label = 0.0 + /* "gensim/models/fasttext_inner.pyx":174 + * label = 0.0 * * row2 = target_index * size # <<<<<<<<<<<<<< * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) @@ -2839,7 +2843,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - /* "gensim/models/fasttext_inner.pyx":174 + /* "gensim/models/fasttext_inner.pyx":175 * * row2 = target_index * size * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2848,7 +2852,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_f_dot = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":175 + /* "gensim/models/fasttext_inner.pyx":176 * row2 = target_index * size * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2866,7 +2870,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_L15_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":176 + /* "gensim/models/fasttext_inner.pyx":177 * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2875,7 +2879,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ goto __pyx_L10_continue; - /* "gensim/models/fasttext_inner.pyx":175 + /* "gensim/models/fasttext_inner.pyx":176 * row2 = target_index * size * f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2884,7 +2888,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":177 + /* "gensim/models/fasttext_inner.pyx":178 * if f_dot <= -MAX_EXP or f_dot >= MAX_EXP: * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2893,7 +2897,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f_dot + 6.0) * 83.0))]); - /* "gensim/models/fasttext_inner.pyx":178 + /* "gensim/models/fasttext_inner.pyx":179 * continue * f = EXP_TABLE[((f_dot + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< @@ -2902,7 +2906,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/fasttext_inner.pyx":180 + /* "gensim/models/fasttext_inner.pyx":181 * g = (label - f) * alpha * * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2911,7 +2915,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":181 + /* "gensim/models/fasttext_inner.pyx":182 * * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2922,7 +2926,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_L10_continue:; } - /* "gensim/models/fasttext_inner.pyx":183 + /* "gensim/models/fasttext_inner.pyx":184 * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) * * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< @@ -2932,7 +2936,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente __pyx_t_3 = ((!(__pyx_v_cbow_mean != 0)) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":184 + /* "gensim/models/fasttext_inner.pyx":185 * * if not cbow_mean: # divide error over summed window vectors * sscal(&size, &inv_count, work, &ONE) # <<<<<<<<<<<<<< @@ -2941,7 +2945,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":183 + /* "gensim/models/fasttext_inner.pyx":184 * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) * * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< @@ -2950,7 +2954,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente */ } - /* "gensim/models/fasttext_inner.pyx":186 + /* "gensim/models/fasttext_inner.pyx":187 * sscal(&size, &inv_count, work, &ONE) * * for m in range(j,k): # <<<<<<<<<<<<<< @@ -2961,70 +2965,68 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14fasttext_inner_fast_sente for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "gensim/models/fasttext_inner.pyx":187 + /* "gensim/models/fasttext_inner.pyx":188 * * for m in range(j,k): * if m == i: # <<<<<<<<<<<<<< * continue - * else: + * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) */ __pyx_t_3 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_3) { - /* "gensim/models/fasttext_inner.pyx":188 + /* "gensim/models/fasttext_inner.pyx":189 * for m in range(j,k): * if m == i: * continue # <<<<<<<<<<<<<< - * else: - * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) + * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) + * for d in range(subwords_idx_len[m]): */ goto __pyx_L18_continue; - /* "gensim/models/fasttext_inner.pyx":187 + /* "gensim/models/fasttext_inner.pyx":188 * * for m in range(j,k): * if m == i: # <<<<<<<<<<<<<< * continue - * else: + * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) */ } /* "gensim/models/fasttext_inner.pyx":190 + * if m == i: * continue - * else: - * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< - * for d in range(subwords_idx_len[m]): - * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) + * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< + * for d in range(subwords_idx_len[m]): + * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) */ - /*else*/ { - __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_vocab[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_vocab[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - /* "gensim/models/fasttext_inner.pyx":191 - * else: - * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) - * for d in range(subwords_idx_len[m]): # <<<<<<<<<<<<<< - * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) + /* "gensim/models/fasttext_inner.pyx":191 + * continue + * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) + * for d in range(subwords_idx_len[m]): # <<<<<<<<<<<<<< + * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) * */ - __pyx_t_4 = (__pyx_v_subwords_idx_len[__pyx_v_m]); - for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { - __pyx_v_d = __pyx_t_5; + __pyx_t_4 = (__pyx_v_subwords_idx_len[__pyx_v_m]); + for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { + __pyx_v_d = __pyx_t_5; - /* "gensim/models/fasttext_inner.pyx":192 - * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) - * for d in range(subwords_idx_len[m]): - * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) # <<<<<<<<<<<<<< + /* "gensim/models/fasttext_inner.pyx":192 + * our_saxpy(&size, &word_locks_vocab[indexes[m]], work, &ONE, &syn0_vocab[indexes[m]*size], &ONE) + * for d in range(subwords_idx_len[m]): + * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) # <<<<<<<<<<<<<< * * return next_random */ - __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_ngrams[((__pyx_v_subwords_idx[__pyx_v_m])[__pyx_v_d])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_ngrams[(((__pyx_v_subwords_idx[__pyx_v_m])[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); - } + __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks_ngrams[((__pyx_v_subwords_idx[__pyx_v_m])[__pyx_v_d])])), __pyx_v_work, (&__pyx_v_6gensim_6models_14fasttext_inner_ONE), (&(__pyx_v_syn0_ngrams[(((__pyx_v_subwords_idx[__pyx_v_m])[__pyx_v_d]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_14fasttext_inner_ONE)); } __pyx_L18_continue:; } /* "gensim/models/fasttext_inner.pyx":194 - * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) + * our_saxpy(&size, &word_locks_ngrams[subwords_idx[m][d]], work, &ONE, &syn0_ngrams[subwords_idx[m][d]*size], &ONE) * * return next_random # <<<<<<<<<<<<<< * @@ -10040,6 +10042,7 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) * * FAST_VERSION = init() # initialize the module # <<<<<<<<<<<<<< * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN + * */ __pyx_t_9 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 544, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); @@ -10068,6 +10071,7 @@ static int __pyx_pymod_exec_fasttext_inner(PyObject *__pyx_pyinit_module) * * FAST_VERSION = init() # initialize the module * MAX_WORDS_IN_BATCH = MAX_SENTENCE_LEN # <<<<<<<<<<<<<< + * */ if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAX_WORDS_IN_BATCH, __pyx_int_10000) < 0) __PYX_ERR(0, 545, __pyx_L1_error) diff --git a/gensim/models/fasttext_inner.pyx b/gensim/models/fasttext_inner.pyx index 5efae09433..09e75adba5 100644 --- a/gensim/models/fasttext_inner.pyx +++ b/gensim/models/fasttext_inner.pyx @@ -164,11 +164,12 @@ cdef unsigned long long fast_sentence_cbow_neg( if d == 0: target_index = word_index label = ONEF - target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) - next_random = (next_random * 25214903917ULL + 11) & modulo - if target_index == word_index: - continue - label = 0.0 + else: + target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) + next_random = (next_random * 25214903917ULL + 11) & modulo + if target_index == word_index: + continue + label = 0.0 row2 = target_index * size f_dot = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) From 2560e1d6eaeb1b6e18fb29bd1a6e460a141c5f70 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Thu, 7 Dec 2017 00:44:20 +0530 Subject: [PATCH 18/22] fixes docstring format --- gensim/models/fasttext.py | 166 +++++++++++++++++++++++--------------- 1 file changed, 103 insertions(+), 63 deletions(-) diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index 5c9b55b23b..bf04da4f64 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -53,6 +53,24 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None): This is the non-optimized, Python version. If you have cython installed, gensim will use the optimized version from fasttext_inner instead. + Parameters + ---------- + model : :class:`~gensim.models.fasttext.FastText` + instance of class `FastText`. + sentences : iterator or list + list or an iterable that streams the sentences directly from disk/network. + alpha : float + learning rate + work : `ndarray` + private working memory for each worker. + neu1 : `ndarray` + private working memory for each worker. + + Returns + ------- + int + effective number of words trained. + """ result = 0 for sentence in sentences: @@ -97,6 +115,24 @@ def train_batch_sg(model, sentences, alpha, work=None, neu1=None): This is the non-optimized, Python version. If you have cython installed, gensim will use the optimized version from fasttext_inner instead. + Parameters + ---------- + model : :class:`~gensim.models.fasttext.FastText` + instance of class `FastText`. + sentences : iterator or list + list or an iterable that streams the sentences directly from disk/network. + alpha : float + learning rate + work : `ndarray` + private working memory for each worker. + neu1 : `ndarray` + private working memory for each worker. + + Returns + ------- + int + effective number of words trained. + """ result = 0 for sentence in sentences: @@ -145,69 +181,72 @@ def __init__( If you don't supply `sentences`, the model is left uninitialized -- use if you plan to initialize it in some other way. - `sg` defines the training algorithm. By default (`sg=0`), CBOW is used. - Otherwise (`sg=1`), skip-gram is employed. - - `size` is the dimensionality of the feature vectors. - - `window` is the maximum distance between the current and predicted word within a sentence. - - `alpha` is the initial learning rate (will linearly drop to `min_alpha` as training progresses). - - `seed` = for the random number generator. Initial vectors for each - word are seeded with a hash of the concatenation of word + str(seed). - Note that for a fully deterministically-reproducible run, you must also limit the model to - a single worker thread, to eliminate ordering jitter from OS thread scheduling. (In Python - 3, reproducibility between interpreter launches also requires use of the PYTHONHASHSEED - environment variable to control hash randomization.) - - `min_count` = ignore all words with total frequency lower than this. - - `max_vocab_size` = limit RAM during vocabulary building; if there are more unique - words than this, then prune the infrequent ones. Every 10 million word types - need about 1GB of RAM. Set to `None` for no limit (default). - - `sample` = threshold for configuring which higher-frequency words are randomly downsampled; + Parameters + ---------- + sg : int {1,0} + Defines the training algorithm. By default (`sg=0`), CBOW is used. + Otherwise (`sg=1`), skip-gram is employed. + size : int + Dimensionality of the feature vectors. + window : int + The maximum distance between the current and predicted word within a sentence. + alpha : float + The initial learning rate (will linearly drop to `min_alpha` as training progresses). + seed : int + Seed for the random number generator. Initial vectors for each + word are seeded with a hash of the concatenation of word + str(seed). + Note that for a fully deterministically-reproducible run, you must also limit the model to + a single worker thread, to eliminate ordering jitter from OS thread scheduling. (In Python + 3, reproducibility between interpreter launches also requires use of the PYTHONHASHSEED + environment variable to control hash randomization.) + min_count : int + Ignores all words with total frequency lower than this. + max_vocab_size : int + Limits the RAM during vocabulary building; if there are more unique + words than this, then prune the infrequent ones. Every 10 million word types + need about 1GB of RAM. Set to `None` for no limit (default). + sample : float + The threshold for configuring which higher-frequency words are randomly downsampled; default is 1e-3, useful range is (0, 1e-5). - - `workers` = use this many worker threads to train the model (=faster training with multicore machines). - - `hs` = if 1, hierarchical softmax will be used for model training. - If set to 0 (default), and `negative` is non-zero, negative sampling will be used. - - `negative` = if > 0, negative sampling will be used, the int for negative - specifies how many "noise words" should be drawn (usually between 5-20). - Default is 5. If set to 0, no negative samping is used. - - `cbow_mean` = if 0, use the sum of the context word vectors. If 1 (default), use the mean. - Only applies when cbow is used. - - `hashfxn` = hash function to use to randomly initialize weights, for increased - training reproducibility. Default is Python's rudimentary built in hash function. - - `iter` = number of iterations (epochs) over the corpus. Default is 5. - - `trim_rule` = vocabulary trimming rule, specifies whether certain words should remain - in the vocabulary, be trimmed away, or handled using the default (discard if word count < min_count). - Can be None (min_count will be used), or a callable that accepts parameters (word, count, min_count) and - returns either `utils.RULE_DISCARD`, `utils.RULE_KEEP` or `utils.RULE_DEFAULT`. - Note: The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part - of the model. - - `sorted_vocab` = if 1 (default), sort the vocabulary by descending frequency before - assigning word indexes. - - `batch_words` = target size (in words) for batches of examples passed to worker threads (and - thus cython routines). Default is 10000. (Larger batches will be passed if individual - texts are longer than 10000 words, but the standard cython code truncates to that maximum.) - - `min_n` = min length of char ngrams to be used for training word representations. Default is 3. - - `max_n` = max length of char ngrams to be used for training word representations. Set `max_n` to be - lesser than `min_n` to avoid char ngrams being used. Default is 6. - - `bucket` = Word and character ngram features are hashed into a fixed number of buckets, in order to limit the - memory usage of the model. This option specifies the number of buckets used by the model. Default: 2000000 + workers : int + Use these many worker threads to train the model (=faster training with multicore machines). + hs : int {1,0} + If 1, hierarchical softmax will be used for model training. + If set to 0 (default), and `negative` is non-zero, negative sampling will be used. + negative : int + If > 0, negative sampling will be used, the int for negative + specifies how many "noise words" should be drawn (usually between 5-20). + Default is 5. If set to 0, no negative samping is used. + cbow_mean : int {1,0} + If 0, use the sum of the context word vectors. If 1 (default), use the mean. + Only applies when cbow is used. + hashfxn : :func: + Hash function to use to randomly initialize weights, for increased + training reproducibility. Default is Python's rudimentary built in hash function. + iter : int + Number of iterations (epochs) over the corpus. Default is 5. + trim_rule : :func: + Vocabulary trimming rule, specifies whether certain words should remain + in the vocabulary, be trimmed away, or handled using the default (discard if word count < min_count). + Can be None (min_count will be used), or a callable that accepts parameters (word, count, min_count) and + returns either `utils.RULE_DISCARD`, `utils.RULE_KEEP` or `utils.RULE_DEFAULT`. + Note: The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part + of the model. + sorted_vocab : int {1,0} + If 1 (default), sort the vocabulary by descending frequency before + assigning word indexes. + batch_words : int + Target size (in words) for batches of examples passed to worker threads (and + thus cython routines). Default is 10000. (Larger batches will be passed if individual + texts are longer than 10000 words, but the standard cython code truncates to that maximum.) + min_n : int + Min length of char ngrams to be used for training word representations. Default is 3. + max_n : int + Max length of char ngrams to be used for training word representations. Set `max_n` to be + lesser than `min_n` to avoid char ngrams being used. Default is 6. + bucket : long + Word and character ngram features are hashed into a fixed number of buckets, in order to limit the + memory usage of the model. This option specifies the number of buckets used by the model. Default: 2000000 Example ------- @@ -334,7 +373,8 @@ def init_ngrams(self, update=False): def reset_ngram_weights(self): """Reset all projection weights to an initial (untrained) state, but keep the - existing vocabulary and their ngrams.""" + existing vocabulary and their ngrams. + """ rand_obj = np.random rand_obj.seed(self.seed) for index in range(len(self.wv.vocab)): From 7b8673a34e6157390926b0ebc3d36967fb73abe3 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Thu, 7 Dec 2017 14:10:44 +0530 Subject: [PATCH 19/22] adds missing docstrings --- gensim/models/fasttext.py | 191 ++++++++++++++++++++++++++++++++------ 1 file changed, 161 insertions(+), 30 deletions(-) diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index bf04da4f64..c60d91c175 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -173,25 +173,26 @@ def __init__( """Initialize the model from an iterable of `sentences`. Each sentence is a list of words (unicode strings) that will be used for training. - The `sentences` iterable can be simply a list, but for larger corpora, - consider an iterable that streams the sentences directly from disk/network. - See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` - or :class:`~gensim.models.word2vec.LineSentence` in this module for such examples. - - If you don't supply `sentences`, the model is left uninitialized -- use if - you plan to initialize it in some other way. - Parameters ---------- + sentences : iterator or list + The `sentences` iterable can be simply a list, but for larger corpora, + consider an iterable that streams the sentences directly from disk/network. + See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` + or :class:`~gensim.models.word2vec.LineSentence` in `~gensim.models.word2vec` module for such examples. + If you don't supply `sentences`, the model is left uninitialized -- use if + you plan to initialize it in some other way. sg : int {1,0} - Defines the training algorithm. By default (`sg=0`), CBOW is used. - Otherwise (`sg=1`), skip-gram is employed. + Defines the training algorithm. If 1, CBOW is used. + Otherwise, skip-gram is employed. size : int Dimensionality of the feature vectors. window : int The maximum distance between the current and predicted word within a sentence. alpha : float - The initial learning rate (will linearly drop to `min_alpha` as training progresses). + The initial learning rate. + min_alpha : float + Learning rate will linearly drop to `min_alpha` as training progresses. seed : int Seed for the random number generator. Initial vectors for each word are seeded with a hash of the concatenation of word + str(seed). @@ -204,27 +205,27 @@ def __init__( max_vocab_size : int Limits the RAM during vocabulary building; if there are more unique words than this, then prune the infrequent ones. Every 10 million word types - need about 1GB of RAM. Set to `None` for no limit (default). + need about 1GB of RAM. Set to `None` for no limit. sample : float The threshold for configuring which higher-frequency words are randomly downsampled; - default is 1e-3, useful range is (0, 1e-5). + Useful range is (0, 1e-5). workers : int Use these many worker threads to train the model (=faster training with multicore machines). hs : int {1,0} If 1, hierarchical softmax will be used for model training. - If set to 0 (default), and `negative` is non-zero, negative sampling will be used. + If set to 0, and `negative` is non-zero, negative sampling will be used. negative : int If > 0, negative sampling will be used, the int for negative specifies how many "noise words" should be drawn (usually between 5-20). - Default is 5. If set to 0, no negative samping is used. + If set to 0, no negative samping is used. cbow_mean : int {1,0} - If 0, use the sum of the context word vectors. If 1 (default), use the mean. + If 0, use the sum of the context word vectors. If 1, use the mean. Only applies when cbow is used. hashfxn : :func: Hash function to use to randomly initialize weights, for increased training reproducibility. Default is Python's rudimentary built in hash function. iter : int - Number of iterations (epochs) over the corpus. Default is 5. + Number of iterations (epochs) over the corpus. trim_rule : :func: Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary, be trimmed away, or handled using the default (discard if word count < min_count). @@ -233,22 +234,25 @@ def __init__( Note: The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part of the model. sorted_vocab : int {1,0} - If 1 (default), sort the vocabulary by descending frequency before + If 1, sort the vocabulary by descending frequency before assigning word indexes. batch_words : int Target size (in words) for batches of examples passed to worker threads (and - thus cython routines). Default is 10000. (Larger batches will be passed if individual + thus cython routines).(Larger batches will be passed if individual texts are longer than 10000 words, but the standard cython code truncates to that maximum.) min_n : int - Min length of char ngrams to be used for training word representations. Default is 3. + Min length of char ngrams to be used for training word representations. max_n : int Max length of char ngrams to be used for training word representations. Set `max_n` to be - lesser than `min_n` to avoid char ngrams being used. Default is 6. + lesser than `min_n` to avoid char ngrams being used. + word_ngrams : int {1,0} + If 1, uses enriches word vectors with subword(ngrams) information. + If 0, this is equivalent to word2vec. bucket : long Word and character ngram features are hashed into a fixed number of buckets, in order to limit the - memory usage of the model. This option specifies the number of buckets used by the model. Default: 2000000 + memory usage of the model. This option specifies the number of buckets used by the model. - Example + examples ------- Initialize and train a `FastText` model: @@ -272,6 +276,8 @@ def __init__( trim_rule=trim_rule, sorted_vocab=sorted_vocab, batch_words=batch_words) def initialize_word_vectors(self): + """Initializes FastTextKeyedVectors instance to store all vocab/ngram vectors for the model. + """ self.wv = FastTextKeyedVectors() self.wv.min_n = self.min_n self.wv.max_n = self.max_n @@ -279,6 +285,38 @@ def initialize_word_vectors(self): def build_vocab(self, sentences, keep_raw_vocab=False, trim_rule=None, progress_per=10000, update=False): """Build vocabulary from a sequence of sentences (can be a once-only generator stream). Each sentence must be a list of unicode strings. + + Parameters + ---------- + sentences : iterator or list + The `sentences` iterable can be simply a list, but for larger corpora, + consider an iterable that streams the sentences directly from disk/network. + See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` + or :class:`~gensim.models.word2vec.LineSentence` in `~gensim.models.word2vec` module for such examples. + keep_raw_vocab : bool + If not true, delete the raw vocabulary after the scaling is done and free up RAM. + trim_rule : :func: + Vocabulary trimming rule, specifies whether certain words should remain + in the vocabulary, be trimmed away, or handled using the default (discard if word count < min_count). + Can be None (min_count will be used), or a callable that accepts parameters (word, count, min_count) and + returns either `utils.RULE_DISCARD`, `utils.RULE_KEEP` or `utils.RULE_DEFAULT`. + Note: The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part + of the model. + progress_per : int + Indicates how many words to process before showing/updateing the progress. + update: bool + If true, the new words in `sentences` will be added to model's vocab. + + Example + ------- + Train a model and update vocab for online training: + + >>> model = FastText(sg=1, hs=0,window=2, negative=5, iter=4) + >>> model.build_vocab(sentences1) + >>> model.train(sentences1, total_examples=model.corpus_count, epochs=model.iter) + >>> model.build_vocab(sentences2, update=True) + >>> model.train(sentences2, total_examples=model.corpus_count, epochs=model.iter) + """ if update: if not len(self.wv.vocab): @@ -294,10 +332,15 @@ def build_vocab(self, sentences, keep_raw_vocab=False, trim_rule=None, progress_ self.init_ngrams(update=update) def init_ngrams(self, update=False): - """Computes ngrams of all words present in vocabulary and stores vectors for only those ngrams. + """Compute ngrams of all words present in vocabulary and stores vectors for only those ngrams. Vectors for other ngrams are initialized with a random uniform distribution in FastText. - If `update` is `True`, the new vocab words and their new ngrams word vectors are initialized - with random uniform distribution and updated/added to the exisiting vocab word and ngram vectors. + + Parameters + ---------- + update : bool + If True, the new vocab words and their new ngrams word vectors are initialized + with random uniform distribution and updated/added to the exisiting vocab word and ngram vectors. + """ if not update: self.wv.ngrams = {} @@ -389,6 +432,24 @@ def reset_ngram_weights(self): def _do_train_job(self, sentences, alpha, inits): """Train a single batch of sentences. Return 2-tuple `(effective word count after ignoring unknown words and sentence length trimming, total word count)`. + + Parameters + ---------- + sentences : iterator or list + The `sentences` iterable can be simply a list, but for larger corpora, + consider an iterable that streams the sentences directly from disk/network. + See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` + or :class:`~gensim.models.word2vec.LineSentence` in `~gensim.models.word2vec` module for such examples. + alpha : float + The current learning rate. + inits : tuple + Each worker's private work memory. + + Returns + ------- + tuple + Tuple of (effective word count after ignoring unknown words and sentence length trimming, total word count) + """ work, neu1 = inits tally = 0 @@ -414,6 +475,38 @@ def train(self, sentences, total_examples=None, total_words=None, To avoid common mistakes around the model's ability to do multiple training passes itself, an explicit `epochs` argument MUST be provided. In the common and recommended case, where `train()` is only called once, the model's cached `iter` value should be supplied as `epochs` value. + + Parameters + ---------- + sentences : iterator or list + The `sentences` iterable can be simply a list, but for larger corpora, + consider an iterable that streams the sentences directly from disk/network. + See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` + or :class:`~gensim.models.word2vec.LineSentence` in `~gensim.models.word2vec` module for such examples. + total_examples : int + Count of sentences. + total_words : int + Count of raw words in sentences. + epochs : int + Number of iterations (epochs) over the corpus. + start_alpha : float + Initial learning rate. + end_alpha : float + Final learning rate. Drops linearly from `start_alpha`. + word_count : int + Count of words already trained. Set this to 0 for the usual + case of training on all words in sentences. + queue_factor : int + Multiplier for size of queue (number of workers * queue_factor). + report_delay : float + Seconds to wait before reporting progress. + + Example + ------- + >>> model = FastText(sg=1, hs=0,window=2, negative=5, iter=4) + >>> model.build_vocab(sentences) + >>> model.train(sentences, total_examples=model.corpus_count, epochs=model.iter) + """ self.neg_labels = [] if self.negative > 0: @@ -429,6 +522,8 @@ def __getitem__(self, word): return self.word_vec(word) def get_vocab_word_vecs(self): + """Calculate vectors for words in vocabulary and stores them in `wv.syn0`. + """ for w, v in self.wv.vocab.items(): word_vec = np.copy(self.wv.syn0_vocab[v.index]) ngrams = self.wv.ngrams_word[w] @@ -439,11 +534,24 @@ def get_vocab_word_vecs(self): self.wv.syn0[v.index] = word_vec def word_vec(self, word, use_norm=False): - """Accept a single word as input. - Returns the word's representations in vector space, as a 1D numpy array. + """Return the word's representations in vector space, as a 1D numpy array. + + Parameters + ---------- + words : str + A single word whose vector needs to be returned. + use_norm : bool + If True, returns normalized vector. - The word can be out-of-vocabulary as long as ngrams for the word are present. - For words with all ngrams absent, a KeyError is raised. + Returns + ------- + `ndarray` + The word's representations in vector space, as a 1D numpy array. + + Raises + ------ + KeyError + For words with all ngrams absent, a KeyError is raised. Example ------- @@ -456,8 +564,31 @@ def word_vec(self, word, use_norm=False): @classmethod def load_fasttext_format(cls, *args, **kwargs): + """Load a :class:`~gensim.models.fasttext.FastText` model from a format compatible with + the original fasttext implementation. + + Parameters + ---------- + fname : str + path to the file + + """ return Ft_Wrapper.load_fasttext_format(*args, **kwargs) def save(self, *args, **kwargs): + """Save the model. This saved model can be loaded again using :func:`~gensim.models.fasttext.FastText.load`, + which supports online training and getting vectors for out-of-vocabulary words. + + Parameters + ---------- + fname : str + path to the file + + Example + ------- + >>> model_gensim.save('saved_model_gensim') + >>> loaded_model = FastText.load('saved_model_gensim') + + """ kwargs['ignore'] = kwargs.get('ignore', ['syn0norm', 'syn0_vocab_norm', 'syn0_ngrams_norm']) super(FastText, self).save(*args, **kwargs) From 144fab9ef5ce2eef7a87679423af78dff46e5fd3 Mon Sep 17 00:00:00 2001 From: ivan Date: Thu, 7 Dec 2017 18:46:19 +0500 Subject: [PATCH 20/22] add missing __getitem__ to rst --- docs/src/models/fasttext.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/src/models/fasttext.rst b/docs/src/models/fasttext.rst index 13a8d86a2c..e65b43fd25 100644 --- a/docs/src/models/fasttext.rst +++ b/docs/src/models/fasttext.rst @@ -5,5 +5,6 @@ :synopsis: FastText model :members: :inherited-members: + :special-members: __getitem__ :undoc-members: :show-inheritance: From c3f5d741dc9e6aaae6642cceca9abc7c63994154 Mon Sep 17 00:00:00 2001 From: ivan Date: Thu, 7 Dec 2017 18:47:12 +0500 Subject: [PATCH 21/22] add missing import to __init__ (`from gensim.models import FastText` instead of `from gensim.models.fasttext ...`) --- gensim/models/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gensim/models/__init__.py b/gensim/models/__init__.py index 530f7c4980..c25e8e7795 100644 --- a/gensim/models/__init__.py +++ b/gensim/models/__init__.py @@ -19,6 +19,7 @@ from .normmodel import NormModel # noqa:F401 from .atmodel import AuthorTopicModel # noqa:F401 from .ldaseqmodel import LdaSeqModel # noqa:F401 +from .fasttext import FastText # noqa:F401 from . import wrappers # noqa:F401 From 327f4ca6d330c514205f6315a9b21a14fb22b7a9 Mon Sep 17 00:00:00 2001 From: ivan Date: Thu, 7 Dec 2017 18:50:16 +0500 Subject: [PATCH 22/22] fix docs --- gensim/models/fasttext.py | 281 +++++++++++++++++++++----------------- 1 file changed, 159 insertions(+), 122 deletions(-) diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index c60d91c175..c8be055a24 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -6,19 +6,23 @@ """Learn word representations via fasttext's "skip-gram and CBOW models", using either hierarchical softmax or negative sampling [1]_. -NOTE: There are more ways to get word vectors in Gensim than just FastText. +Notes +----- +There are more ways to get word vectors in Gensim than just FastText. See wrappers for VarEmbed and WordRank or Word2Vec This module allows training a word embedding from a training corpus with the additional ability to obtain word vectors for out-of-vocabulary words. -For a tutorial on gensim's native fasttext, refer to the noteboook -- -https://github.com/RaRe-Technologies/gensim/blob/develop/docs/notebooks/FastText_Tutorial.ipynb +For a tutorial on gensim's native fasttext, refer to the noteboook -- [2]_ **Make sure you have a C compiler before installing gensim, to use optimized (compiled) fasttext training** .. [1] P. Bojanowski, E. Grave, A. Joulin, T. Mikolov Enriching Word Vectors with Subword Information. In arXiv preprint arXiv:1607.04606. + https://arxiv.org/abs/1607.04606 + +.. [2] https://github.com/RaRe-Technologies/gensim/blob/develop/docs/notebooks/FastText_Tutorial.ipynb """ @@ -48,7 +52,7 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None): """Update CBOW model by training on a sequence of sentences. Each sentence is a list of string tokens, which are looked up in the model's - vocab dictionary. Called internally from `FastText.train()`. + vocab dictionary. Called internally from :meth:`gensim.models.fasttext.FastText.train()`. This is the non-optimized, Python version. If you have cython installed, gensim will use the optimized version from fasttext_inner instead. @@ -56,20 +60,20 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None): Parameters ---------- model : :class:`~gensim.models.fasttext.FastText` - instance of class `FastText`. - sentences : iterator or list - list or an iterable that streams the sentences directly from disk/network. + `FastText` instance. + sentences : iterable of iterables + Iterable of the sentences directly from disk/network. alpha : float - learning rate - work : `ndarray` - private working memory for each worker. - neu1 : `ndarray` - private working memory for each worker. + Learning rate. + work : :class:`numpy.ndarray` + Private working memory for each worker. + neu1 : :class:`numpy.ndarray` + Private working memory for each worker. Returns ------- int - effective number of words trained. + Effective number of words trained. """ result = 0 @@ -110,7 +114,7 @@ def train_batch_sg(model, sentences, alpha, work=None, neu1=None): """Update skip-gram model by training on a sequence of sentences. Each sentence is a list of string tokens, which are looked up in the model's - vocab dictionary. Called internally from `FastText.train()`. + vocab dictionary. Called internally from :meth:`gensim.models.fasttext.FastText.train()`. This is the non-optimized, Python version. If you have cython installed, gensim will use the optimized version from fasttext_inner instead. @@ -118,20 +122,20 @@ def train_batch_sg(model, sentences, alpha, work=None, neu1=None): Parameters ---------- model : :class:`~gensim.models.fasttext.FastText` - instance of class `FastText`. - sentences : iterator or list - list or an iterable that streams the sentences directly from disk/network. + `FastText` instance. + sentences : iterable of iterables + Iterable of the sentences directly from disk/network. alpha : float - learning rate - work : `ndarray` - private working memory for each worker. - neu1 : `ndarray` - private working memory for each worker. + Learning rate. + work : :class:`numpy.ndarray` + Private working memory for each worker. + neu1 : :class:`numpy.ndarray` + Private working memory for each worker. Returns ------- int - effective number of words trained. + Effective number of words trained. """ result = 0 @@ -158,11 +162,12 @@ def train_batch_sg(model, sentences, alpha, work=None, neu1=None): class FastText(Word2Vec): - """Class for training, using and evaluating word representations learned using method described in - `Enriching Word Vectors with Subword Information` aka Fasttext. + """Class for training, using and evaluating word representations learned using method + described in [1]_ aka Fasttext. - The model can be stored/loaded via its `save()` and `load()` methods, or loaded in a format - compatible with the original fasttext implementation via `load_fasttext_format()`. + The model can be stored/loaded via its :meth:`~gensim.models.fasttext.FastText.save()` and + :meth:`~gensim.models.fasttext.FastText.load()` methods, or loaded in a format compatible with the original + fasttext implementation via :meth:`~gensim.models.fasttext.FastText.load_fasttext_format()`. """ def __init__( @@ -175,16 +180,15 @@ def __init__( Parameters ---------- - sentences : iterator or list - The `sentences` iterable can be simply a list, but for larger corpora, + sentences : iterable of iterables + The `sentences` iterable can be simply a list of lists of tokens, but for larger corpora, consider an iterable that streams the sentences directly from disk/network. See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` - or :class:`~gensim.models.word2vec.LineSentence` in `~gensim.models.word2vec` module for such examples. - If you don't supply `sentences`, the model is left uninitialized -- use if - you plan to initialize it in some other way. - sg : int {1,0} - Defines the training algorithm. If 1, CBOW is used. - Otherwise, skip-gram is employed. + or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples. + If you don't supply `sentences`, the model is left uninitialized -- use if you plan to initialize it + in some other way. + sg : int {1, 0} + Defines the training algorithm. If 1, CBOW is used, otherwise, skip-gram is employed. size : int Dimensionality of the feature vectors. window : int @@ -194,48 +198,45 @@ def __init__( min_alpha : float Learning rate will linearly drop to `min_alpha` as training progresses. seed : int - Seed for the random number generator. Initial vectors for each - word are seeded with a hash of the concatenation of word + str(seed). - Note that for a fully deterministically-reproducible run, you must also limit the model to - a single worker thread, to eliminate ordering jitter from OS thread scheduling. (In Python - 3, reproducibility between interpreter launches also requires use of the PYTHONHASHSEED - environment variable to control hash randomization.) + Seed for the random number generator. Initial vectors for each word are seeded with a hash of + the concatenation of word + `str(seed)`. Note that for a fully deterministically-reproducible run, + you must also limit the model to a single worker thread (`workers=1`), to eliminate ordering jitter + from OS thread scheduling. (In Python 3, reproducibility between interpreter launches also requires + use of the `PYTHONHASHSEED` environment variable to control hash randomization). min_count : int Ignores all words with total frequency lower than this. max_vocab_size : int Limits the RAM during vocabulary building; if there are more unique - words than this, then prune the infrequent ones. Every 10 million word types - need about 1GB of RAM. Set to `None` for no limit. + words than this, then prune the infrequent ones. Every 10 million word types need about 1GB of RAM. + Set to `None` for no limit. sample : float - The threshold for configuring which higher-frequency words are randomly downsampled; - Useful range is (0, 1e-5). + The threshold for configuring which higher-frequency words are randomly downsampled, + useful range is (0, 1e-5). workers : int Use these many worker threads to train the model (=faster training with multicore machines). hs : int {1,0} If 1, hierarchical softmax will be used for model training. If set to 0, and `negative` is non-zero, negative sampling will be used. negative : int - If > 0, negative sampling will be used, the int for negative - specifies how many "noise words" should be drawn (usually between 5-20). - If set to 0, no negative samping is used. + If > 0, negative sampling will be used, the int for negative specifies how many "noise words" + should be drawn (usually between 5-20). + If set to 0, no negative sampling is used. cbow_mean : int {1,0} - If 0, use the sum of the context word vectors. If 1, use the mean. - Only applies when cbow is used. - hashfxn : :func: - Hash function to use to randomly initialize weights, for increased - training reproducibility. Default is Python's rudimentary built in hash function. + If 0, use the sum of the context word vectors. If 1, use the mean, only applies when cbow is used. + hashfxn : function + Hash function to use to randomly initialize weights, for increased training reproducibility. iter : int Number of iterations (epochs) over the corpus. - trim_rule : :func: - Vocabulary trimming rule, specifies whether certain words should remain - in the vocabulary, be trimmed away, or handled using the default (discard if word count < min_count). - Can be None (min_count will be used), or a callable that accepts parameters (word, count, min_count) and - returns either `utils.RULE_DISCARD`, `utils.RULE_KEEP` or `utils.RULE_DEFAULT`. + trim_rule : function + Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary, + be trimmed away, or handled using the default (discard if word count < min_count). + Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`), + or a callable that accepts parameters (word, count, min_count) and returns either + :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`. Note: The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part of the model. sorted_vocab : int {1,0} - If 1, sort the vocabulary by descending frequency before - assigning word indexes. + If 1, sort the vocabulary by descending frequency before assigning word indexes. batch_words : int Target size (in words) for batches of examples passed to worker threads (and thus cython routines).(Larger batches will be passed if individual @@ -248,17 +249,21 @@ def __init__( word_ngrams : int {1,0} If 1, uses enriches word vectors with subword(ngrams) information. If 0, this is equivalent to word2vec. - bucket : long - Word and character ngram features are hashed into a fixed number of buckets, in order to limit the + bucket : int + Character ngrams are hashed into a fixed number of buckets, in order to limit the memory usage of the model. This option specifies the number of buckets used by the model. - examples - ------- - Initialize and train a `FastText` model: + Examples + -------- + Initialize and train a `FastText` model + + >>> from gensim.models import FastText + >>> sentences = [["cat", "say", "meow"], ["dog", "say", "woof"]] + >>> + >>> model = FastText(sentences, min_count=1) + >>> say_vector = model['say'] # get vector for word + >>> of_vector = model['of'] # get vector for out-of-vocab word - >>> model = FastText(sg=1, hs=0,window=2, negative=5, iter=4) - >>> model.build_vocab(sentences) - >>> model.train(sentences, total_examples=model.corpus_count, epochs=model.iter) """ # fastText specific params @@ -276,8 +281,7 @@ def __init__( trim_rule=trim_rule, sorted_vocab=sorted_vocab, batch_words=batch_words) def initialize_word_vectors(self): - """Initializes FastTextKeyedVectors instance to store all vocab/ngram vectors for the model. - """ + """Initializes FastTextKeyedVectors instance to store all vocab/ngram vectors for the model.""" self.wv = FastTextKeyedVectors() self.wv.min_n = self.min_n self.wv.max_n = self.max_n @@ -288,34 +292,39 @@ def build_vocab(self, sentences, keep_raw_vocab=False, trim_rule=None, progress_ Parameters ---------- - sentences : iterator or list - The `sentences` iterable can be simply a list, but for larger corpora, + sentences : iterable of iterables + The `sentences` iterable can be simply a list of lists of tokens, but for larger corpora, consider an iterable that streams the sentences directly from disk/network. See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` - or :class:`~gensim.models.word2vec.LineSentence` in `~gensim.models.word2vec` module for such examples. + or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples. keep_raw_vocab : bool If not true, delete the raw vocabulary after the scaling is done and free up RAM. - trim_rule : :func: - Vocabulary trimming rule, specifies whether certain words should remain - in the vocabulary, be trimmed away, or handled using the default (discard if word count < min_count). - Can be None (min_count will be used), or a callable that accepts parameters (word, count, min_count) and - returns either `utils.RULE_DISCARD`, `utils.RULE_KEEP` or `utils.RULE_DEFAULT`. + trim_rule : function + Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary, + be trimmed away, or handled using the default (discard if word count < min_count). + Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`), + or a callable that accepts parameters (word, count, min_count) and returns either + :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`. Note: The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part of the model. progress_per : int - Indicates how many words to process before showing/updateing the progress. + Indicates how many words to process before showing/updating the progress. update: bool If true, the new words in `sentences` will be added to model's vocab. Example ------- - Train a model and update vocab for online training: - - >>> model = FastText(sg=1, hs=0,window=2, negative=5, iter=4) - >>> model.build_vocab(sentences1) - >>> model.train(sentences1, total_examples=model.corpus_count, epochs=model.iter) - >>> model.build_vocab(sentences2, update=True) - >>> model.train(sentences2, total_examples=model.corpus_count, epochs=model.iter) + Train a model and update vocab for online training + + >>> from gensim.models import FastText + >>> sentences_1 = [["cat", "say", "meow"], ["dog", "say", "woof"]] + >>> sentences_2 = [["dude", "say", "wazzup!"]] + >>> + >>> model = FastText(min_count=1) + >>> model.build_vocab(sentences_1) + >>> model.train(sentences_1, total_examples=model.corpus_count, epochs=model.iter) + >>> model.build_vocab(sentences_2, update=True) + >>> model.train(sentences_2, total_examples=model.corpus_count, epochs=model.iter) """ if update: @@ -339,7 +348,7 @@ def init_ngrams(self, update=False): ---------- update : bool If True, the new vocab words and their new ngrams word vectors are initialized - with random uniform distribution and updated/added to the exisiting vocab word and ngram vectors. + with random uniform distribution and updated/added to the existing vocab word and ngram vectors. """ if not update: @@ -415,8 +424,9 @@ def init_ngrams(self, update=False): self.syn0_ngrams_lockf = vstack([self.syn0_ngrams_lockf, new_ngram_lockf_rows]) def reset_ngram_weights(self): - """Reset all projection weights to an initial (untrained) state, but keep the - existing vocabulary and their ngrams. + """Reset all projection weights to an initial (untrained) state, + but keep the existing vocabulary and their ngrams. + """ rand_obj = np.random rand_obj.seed(self.seed) @@ -435,19 +445,19 @@ def _do_train_job(self, sentences, alpha, inits): Parameters ---------- - sentences : iterator or list - The `sentences` iterable can be simply a list, but for larger corpora, + sentences : iterable of iterables + The `sentences` iterable can be simply a list of lists of tokens, but for larger corpora, consider an iterable that streams the sentences directly from disk/network. See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` - or :class:`~gensim.models.word2vec.LineSentence` in `~gensim.models.word2vec` module for such examples. + or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples. alpha : float The current learning rate. - inits : tuple + inits : (:class:`numpy.ndarray`, :class:`numpy.ndarray`) Each worker's private work memory. Returns ------- - tuple + (int, int) Tuple of (effective word count after ignoring unknown words and sentence length trimming, total word count) """ @@ -467,22 +477,23 @@ def train(self, sentences, total_examples=None, total_words=None, For FastText, each sentence must be a list of unicode strings. (Subclasses may accept other examples.) To support linear learning-rate decay from (initial) alpha to min_alpha, and accurate - progres-percentage logging, either total_examples (count of sentences) or total_words (count of - raw words in sentences) MUST be provided. (If the corpus is the same as was provided to - `build_vocab()`, the count of examples in that corpus will be available in the model's - `corpus_count` property.) + progress-percentage logging, either total_examples (count of sentences) or total_words (count of + raw words in sentences) **MUST** be provided (if the corpus is the same as was provided to + :meth:`~gensim.models.fasttext.FastText.build_vocab()`, the count of examples in that corpus + will be available in the model's :attr:`corpus_count` property). To avoid common mistakes around the model's ability to do multiple training passes itself, an - explicit `epochs` argument MUST be provided. In the common and recommended case, where `train()` - is only called once, the model's cached `iter` value should be supplied as `epochs` value. + explicit `epochs` argument **MUST** be provided. In the common and recommended case, + where :meth:`~gensim.models.fasttext.FastText.train()` is only called once, + the model's cached `iter` value should be supplied as `epochs` value. Parameters ---------- - sentences : iterator or list - The `sentences` iterable can be simply a list, but for larger corpora, + sentences : iterable of iterables + The `sentences` iterable can be simply a list of lists of tokens, but for larger corpora, consider an iterable that streams the sentences directly from disk/network. See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` - or :class:`~gensim.models.word2vec.LineSentence` in `~gensim.models.word2vec` module for such examples. + or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples. total_examples : int Count of sentences. total_words : int @@ -501,9 +512,12 @@ def train(self, sentences, total_examples=None, total_words=None, report_delay : float Seconds to wait before reporting progress. - Example - ------- - >>> model = FastText(sg=1, hs=0,window=2, negative=5, iter=4) + Examples + -------- + >>> from gensim.models import FastText + >>> sentences = [["cat", "say", "meow"], ["dog", "say", "woof"]] + >>> + >>> model = FastText(min_count=1) >>> model.build_vocab(sentences) >>> model.train(sentences, total_examples=model.corpus_count, epochs=model.iter) @@ -514,16 +528,42 @@ def train(self, sentences, total_examples=None, total_words=None, self.neg_labels = zeros(self.negative + 1) self.neg_labels[0] = 1. - Word2Vec.train(self, sentences, total_examples=self.corpus_count, epochs=self.iter, + Word2Vec.train( + self, sentences, total_examples=self.corpus_count, epochs=self.iter, start_alpha=self.alpha, end_alpha=self.min_alpha) self.get_vocab_word_vecs() def __getitem__(self, word): + """Get `word` representations in vector space, as a 1D numpy array. + + Parameters + ---------- + word : str + A single word whose vector needs to be returned. + + Returns + ------- + :class:`numpy.ndarray` + The word's representations in vector space, as a 1D numpy array. + + Raises + ------ + KeyError + For words with all ngrams absent, a KeyError is raised. + + Example + ------- + >>> from gensim.models import FastText + >>> from gensim.test.utils import datapath + >>> + >>> trained_model = FastText.load_fasttext_format(datapath('lee_fasttext')) + >>> meow_vector = trained_model['hello'] # get vector for word + + """ return self.word_vec(word) def get_vocab_word_vecs(self): - """Calculate vectors for words in vocabulary and stores them in `wv.syn0`. - """ + """Calculate vectors for words in vocabulary and stores them in `wv.syn0`.""" for w, v in self.wv.vocab.items(): word_vec = np.copy(self.wv.syn0_vocab[v.index]) ngrams = self.wv.ngrams_word[w] @@ -534,18 +574,18 @@ def get_vocab_word_vecs(self): self.wv.syn0[v.index] = word_vec def word_vec(self, word, use_norm=False): - """Return the word's representations in vector space, as a 1D numpy array. + """Get the word's representations in vector space, as a 1D numpy array. Parameters ---------- - words : str + word : str A single word whose vector needs to be returned. use_norm : bool If True, returns normalized vector. Returns ------- - `ndarray` + :class:`numpy.ndarray` The word's representations in vector space, as a 1D numpy array. Raises @@ -555,9 +595,11 @@ def word_vec(self, word, use_norm=False): Example ------- - - >>> trained_model['office'] - array([ -1.40128313e-02, ...]) + >>> from gensim.models import FastText + >>> sentences = [["cat", "say", "meow"], ["dog", "say", "woof"]] + >>> + >>> model = FastText(sentences, min_count=1) + >>> meow_vector = model.word_vec('meow') # get vector for word """ return FastTextKeyedVectors.word_vec(self.wv, word, use_norm=use_norm) @@ -570,7 +612,7 @@ def load_fasttext_format(cls, *args, **kwargs): Parameters ---------- fname : str - path to the file + Path to the file. """ return Ft_Wrapper.load_fasttext_format(*args, **kwargs) @@ -582,12 +624,7 @@ def save(self, *args, **kwargs): Parameters ---------- fname : str - path to the file - - Example - ------- - >>> model_gensim.save('saved_model_gensim') - >>> loaded_model = FastText.load('saved_model_gensim') + Path to the file. """ kwargs['ignore'] = kwargs.get('ignore', ['syn0norm', 'syn0_vocab_norm', 'syn0_ngrams_norm'])