Skip to content

Commit

Permalink
delete .gitattributes (#2836)
Browse files Browse the repository at this point in the history
* delete .gitattributes

* disable certain tests on Azure pipelines

* tweak env var behavior

* disable one more test

* make the newest version of flake8 happy

* patch tox.ini to pin flake8 and flake8-rst versions

Co-authored-by: Michael Penkov <m@penkov.dev>
  • Loading branch information
gojomo and mpenkov authored May 14, 2020
1 parent 69732eb commit 2360459
Show file tree
Hide file tree
Showing 13 changed files with 60 additions and 27 deletions.
1 change: 0 additions & 1 deletion .gitattributes

This file was deleted.

2 changes: 1 addition & 1 deletion gensim/corpora/sharded_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ def __add_to_slice(self, s_result, result_start, result_stop, start, stop):
if (result_stop - result_start) != (stop - start):
raise ValueError(
'Result start/stop range different than stop/start range (%d - %d vs. %d - %d)'
.format(result_start, result_stop, start, stop)
% (result_start, result_stop, start, stop)
)

# Dense data: just copy using numpy's slice notation
Expand Down
2 changes: 1 addition & 1 deletion gensim/models/poincare.py
Original file line number Diff line number Diff line change
Expand Up @@ -1461,7 +1461,7 @@ def __iter__(self):
if sys.version_info[0] < 3:
lines = file_obj
else:
lines = (l.decode(self.encoding) for l in file_obj)
lines = (line.decode(self.encoding) for line in file_obj)
# csv.reader requires bytestring input in python2, unicode input in python3
reader = csv.reader(lines, delimiter=self.delimiter)
for row in reader:
Expand Down
13 changes: 13 additions & 0 deletions gensim/test/test_corpora.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import codecs
import itertools
import logging
import os
import os.path
import tempfile
import unittest
Expand All @@ -26,6 +27,9 @@
from gensim.test.utils import datapath, get_tmpfile, common_corpus


AZURE = bool(os.environ.get('PIPELINE_WORKSPACE'))


class DummyTransformer(object):
def __getitem__(self, bow):
if len(next(iter(bow))) == 2:
Expand Down Expand Up @@ -58,6 +62,7 @@ def tearDown(self):
except OSError:
pass

@unittest.skipIf(AZURE, 'see <https://github.com/RaRe-Technologies/gensim/pull/2836>')
def test_load(self):
fname = datapath('testcorpus.' + self.file_extension.lstrip('.'))
corpus = self.corpus_class(fname)
Expand All @@ -66,6 +71,7 @@ def test_load(self):
# the deerwester corpus always has nine documents
self.assertEqual(len(docs), 9)

@unittest.skipIf(AZURE, 'see <https://github.com/RaRe-Technologies/gensim/pull/2836>')
def test_len(self):
fname = datapath('testcorpus.' + self.file_extension.lstrip('.'))
corpus = self.corpus_class(fname)
Expand All @@ -81,6 +87,7 @@ def test_len(self):

self.assertEqual(len(corpus), 9)

@unittest.skipIf(AZURE, 'see <https://github.com/RaRe-Technologies/gensim/pull/2836>')
def test_empty_input(self):
tmpf = get_tmpfile('gensim_corpus.tst')
with open(tmpf, 'w') as f:
Expand All @@ -95,6 +102,7 @@ def test_empty_input(self):
docs = list(corpus)
self.assertEqual(len(docs), 0)

@unittest.skipIf(AZURE, 'see <https://github.com/RaRe-Technologies/gensim/pull/2836>')
def test_save(self):
corpus = self.TEST_CORPUS
tmpf = get_tmpfile('gensim_corpus.tst')
Expand All @@ -106,6 +114,7 @@ def test_save(self):
corpus2 = list(self.corpus_class(tmpf))
self.assertEqual(corpus, corpus2)

@unittest.skipIf(AZURE, 'see <https://github.com/RaRe-Technologies/gensim/pull/2836>')
def test_serialize(self):
corpus = self.TEST_CORPUS
tmpf = get_tmpfile('gensim_corpus.tst')
Expand All @@ -127,6 +136,7 @@ def test_serialize(self):
idx = [1, 3, 5, 7]
self.assertEqual(corpus[idx], corpus2[idx])

@unittest.skipIf(AZURE, 'see <https://github.com/RaRe-Technologies/gensim/pull/2836>')
def test_serialize_compressed(self):
corpus = self.TEST_CORPUS
tmpf = get_tmpfile('gensim_corpus.tst')
Expand All @@ -144,6 +154,7 @@ def test_serialize_compressed(self):
for i in range(len(corpus)):
self.assertEqual(corpus[i], corpus2[i])

@unittest.skipIf(AZURE, 'see <https://github.com/RaRe-Technologies/gensim/pull/2836>')
def test_switch_id2word(self):
fname = datapath('testcorpus.' + self.file_extension.lstrip('.'))
corpus = self.corpus_class(fname)
Expand All @@ -161,6 +172,7 @@ def test_switch_id2word(self):
testdoc2 = set((to_unicode(corpus.id2word[x]), y) for x, y in firstdoc2)
self.assertEqual(testdoc2, {('computer', 1), ('human', 1), ('interface', 1)})

@unittest.skipIf(AZURE, 'see <https://github.com/RaRe-Technologies/gensim/pull/2836>')
def test_indexing(self):
fname = datapath('testcorpus.' + self.file_extension.lstrip('.'))
corpus = self.corpus_class(fname)
Expand Down Expand Up @@ -233,6 +245,7 @@ def test_closed_file_object(self):
self.assertEqual(f, 0)
self.assertEqual(s, 0)

@unittest.skipIf(AZURE, 'see <https://github.com/RaRe-Technologies/gensim/pull/2836>')
def test_load(self):
self.assertEqual(self.corpus.num_docs, 9)
self.assertEqual(self.corpus.num_terms, 12)
Expand Down
8 changes: 4 additions & 4 deletions gensim/test/test_doc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -666,17 +666,17 @@ def test_word_vec_non_writeable(self):
vector *= 0

@log_capture()
def testBuildVocabWarning(self, l):
def testBuildVocabWarning(self, line):
"""Test if logger warning is raised on non-ideal input to a doc2vec model"""
raw_sentences = ['human', 'machine']
sentences = [doc2vec.TaggedDocument(words, [i]) for i, words in enumerate(raw_sentences)]
model = doc2vec.Doc2Vec()
model.build_vocab(sentences)
warning = "Each 'words' should be a list of words (usually unicode strings)."
self.assertTrue(warning in str(l))
self.assertTrue(warning in str(line))

@log_capture()
def testTrainWarning(self, l):
def testTrainWarning(self, line):
"""Test if warning is raised if alpha rises during subsequent calls to train()"""
raw_sentences = [['human'],
['graph', 'trees']]
Expand All @@ -690,7 +690,7 @@ def testTrainWarning(self, l):
if epoch == 5:
model.alpha += 0.05
warning = "Effective 'alpha' higher than previous training cycles"
self.assertTrue(warning in str(l))
self.assertTrue(warning in str(line))

def testLoadOnClassError(self):
"""Test if exception is raised when loading doc2vec model on instance"""
Expand Down
12 changes: 6 additions & 6 deletions gensim/test/test_fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,12 +703,12 @@ def test_online_learning_after_save_fromfile(self):

def online_sanity(self, model):
terro, others = [], []
for l in list_corpus:
if 'terrorism' in l:
terro.append(l)
for x in list_corpus:
if 'terrorism' in x:
terro.append(x)
else:
others.append(l)
self.assertTrue(all('terrorism' not in l for l in others))
others.append(x)
self.assertTrue(all('terrorism' not in x for x in others))
model.build_vocab(others)
model.train(others, total_examples=model.corpus_count, epochs=model.epochs)
# checks that `vectors` is different from `vectors_vocab`
Expand Down Expand Up @@ -1468,7 +1468,7 @@ def line_to_array(line):
stdout=subprocess.PIPE)
words_str = '\n'.join(words)
out, _ = process.communicate(input=words_str.encode("utf-8"))
return np.array([line_to_array(l) for l in out.splitlines()], dtype=np.float32)
return np.array([line_to_array(line) for line in out.splitlines()], dtype=np.float32)


@unittest.skipIf(not os.environ.get("FT_HOME", None), "FT_HOME env variable not set, skipping test")
Expand Down
6 changes: 5 additions & 1 deletion gensim/test/test_ldamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@


import logging
import unittest
import numbers
import os
import unittest

import six
import numpy as np
Expand All @@ -23,6 +24,8 @@
from gensim.test import basetmtests
from gensim.test.utils import datapath, get_tmpfile, common_texts

AZURE = bool(os.environ.get('PIPELINE_WORKSPACE'))

dictionary = Dictionary(common_texts)
corpus = [dictionary.doc2bow(text) for text in common_texts]

Expand Down Expand Up @@ -210,6 +213,7 @@ def testGetTopicTerms(self):
self.assertTrue(isinstance(k, numbers.Integral))
self.assertTrue(np.issubdtype(v, np.floating))

@unittest.skipIf(AZURE, 'see <https://github.com/RaRe-Technologies/gensim/pull/2836>')
def testGetDocumentTopics(self):

model = self.class_(
Expand Down
2 changes: 1 addition & 1 deletion gensim/test/test_ldavowpalwabbit_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def get_corpus():
dict_path = datapath('ldavowpalwabbit.dict.txt')
dictionary = Dictionary.load_from_text(dict_path)
with open(text_path) as fhandle:
corpus = [dictionary.doc2bow(l.strip().split()) for l in fhandle]
corpus = [dictionary.doc2bow(line.strip().split()) for line in fhandle]
return corpus, dictionary


Expand Down
4 changes: 4 additions & 0 deletions gensim/test/test_sklearn_api.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import unittest
import numpy
import codecs
Expand Down Expand Up @@ -27,6 +28,8 @@
from gensim import matutils, models
from gensim.test.utils import datapath, common_texts

AZURE = bool(os.environ.get('PIPELINE_WORKSPACE'))

texts = [
['complier', 'system', 'computer'],
['eulerian', 'node', 'cycle', 'graph', 'tree', 'path'],
Expand Down Expand Up @@ -1046,6 +1049,7 @@ def setUp(self):
self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
self.model.fit(self.corpus)

@unittest.skipIf(AZURE, 'see <https://github.com/RaRe-Technologies/gensim/pull/2836>')
def testTransform(self):
# tranform one document
doc = self.corpus[0]
Expand Down
10 changes: 8 additions & 2 deletions gensim/test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,10 @@ def test_flatten_not_nested(self):
class TestSaveAsLineSentence(unittest.TestCase):
def test_save_as_line_sentence_en(self):
corpus_file = get_tmpfile('gensim_utils.tst')
ref_sentences = [l.split() for l in utils.any2unicode('hello world\nhow are you').split('\n')]
ref_sentences = [
line.split()
for line in utils.any2unicode('hello world\nhow are you').split('\n')
]

utils.save_as_line_sentence(ref_sentences, corpus_file)

Expand All @@ -254,7 +257,10 @@ def test_save_as_line_sentence_en(self):

def test_save_as_line_sentence_ru(self):
corpus_file = get_tmpfile('gensim_utils.tst')
ref_sentences = [l.split() for l in utils.any2unicode('привет мир\nкак ты поживаешь').split('\n')]
ref_sentences = [
line.split()
for line in utils.any2unicode('привет мир\nкак ты поживаешь').split('\n')
]
utils.save_as_line_sentence(ref_sentences, corpus_file)

with utils.open(corpus_file, 'rb', encoding='utf8') as fin:
Expand Down
4 changes: 4 additions & 0 deletions gensim/test/test_varembed_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"""

import logging
import os
import sys

import numpy as np
Expand All @@ -29,7 +30,10 @@
varembed_model_vector_file = datapath('varembed_vectors.pkl')
varembed_model_morfessor_file = datapath('varembed_morfessor.bin')

AZURE = bool(os.environ.get('PIPELINE_WORKSPACE'))


@unittest.skipIf(AZURE, 'see <https://github.com/RaRe-Technologies/gensim/pull/2836>')
class TestVarembed(unittest.TestCase):
def testLoadVarembedFormat(self):
"""Test storing/loading the entire model."""
Expand Down
18 changes: 9 additions & 9 deletions gensim/test/test_word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,12 +241,12 @@ def testOnlineLearningAfterSaveFromFile(self):

def onlineSanity(self, model, trained_model=False):
terro, others = [], []
for l in list_corpus:
if 'terrorism' in l:
terro.append(l)
for x in list_corpus:
if 'terrorism' in x:
terro.append(x)
else:
others.append(l)
self.assertTrue(all('terrorism' not in l for l in others))
others.append(x)
self.assertTrue(all('terrorism' not in x for x in others))
model.build_vocab(others, update=trained_model)
model.train(others, total_examples=model.corpus_count, epochs=model.epochs)
self.assertFalse('terrorism' in model.wv.vocab)
Expand Down Expand Up @@ -952,16 +952,16 @@ def testLoadOldModel(self):
loaded_model.train(list_corpus, total_examples=model.corpus_count, epochs=model.epochs)

@log_capture()
def testBuildVocabWarning(self, l):
def testBuildVocabWarning(self, line):
"""Test if warning is raised on non-ideal input to a word2vec model"""
sentences = ['human', 'machine']
model = word2vec.Word2Vec()
model.build_vocab(sentences)
warning = "Each 'sentences' item should be a list of words (usually unicode strings)."
self.assertTrue(warning in str(l))
self.assertTrue(warning in str(line))

@log_capture()
def testTrainWarning(self, l):
def testTrainWarning(self, line):
"""Test if warning is raised if alpha rises during subsequent calls to train()"""
sentences = [
['human'],
Expand All @@ -976,7 +976,7 @@ def testTrainWarning(self, l):
if epoch == 5:
model.alpha += 0.05
warning = "Effective 'alpha' higher than previous training cycles"
self.assertTrue(warning in str(l))
self.assertTrue(warning in str(line))

def test_train_with_explicit_param(self):
model = word2vec.Word2Vec(size=2, min_count=1, hs=1, negative=0)
Expand Down
5 changes: 4 additions & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ setenv =
MALLET_HOME={env:MALLET_HOME:}
SKIP_NETWORK_TESTS={env:SKIP_NETWORK_TESTS:}
BOTO_CONFIG={env:BOTO_CONFIG:}
PIPELINE_WORKSPACE={env:PIPELINE_WORKSPACE:}
PYTHONHASHSEED=1
TOX_PARALLEL_NO_SPINNER=1

Expand All @@ -55,7 +56,9 @@ commands = flake8 gensim/ {posargs}

[testenv:flake8-docs]
recreate = True
deps = flake8-rst==0.4.3
deps =
flake8-rst==0.4.3
flake8==3.7.9

commands = flake8-rst gensim/ docs/ {posargs}

Expand Down

0 comments on commit 2360459

Please sign in to comment.