Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Throw exception if load() is called on instance rather than the class #889

Merged
merged 42 commits into from
Dec 22, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
25fc004
Update word2vec.py
aayux Sep 26, 2016
58976d8
Update word2vec.py
aayux Sep 26, 2016
cf34e2d
Update word2vec.py
aayux Sep 26, 2016
773a75e
Update doc2vec.py
aayux Sep 26, 2016
96d65ec
Update doc2vec.py
aayux Sep 29, 2016
d91b99d
Update word2vec.py
aayux Sep 29, 2016
c156c79
Update utils.py
aayux Oct 24, 2016
60d42a3
Update word2vec.py
aayux Oct 24, 2016
cc7e3e1
Update utils.py
aayux Oct 24, 2016
a0ea2b4
Update word2vec.py
aayux Oct 24, 2016
2d85ef5
Update doc2vec.py
aayux Oct 24, 2016
ac0b0f3
Merge pull request #1 from dust0x/Issue-889
aayux Oct 24, 2016
770a277
Update doc2vec.py
aayux Oct 26, 2016
772cdcd
Update word2vec.py
aayux Oct 26, 2016
4c4ec8c
Update utils.py
aayux Oct 26, 2016
7173993
Update word2vec.py
aayux Oct 26, 2016
bb7b9e9
Update doc2vec.py
aayux Oct 26, 2016
23461e3
Merge branch 'develop' of https://github.com/RaRe-Technologies/gensim…
aayux Oct 26, 2016
fb5bd0b
Update doc2vec.py
aayux Oct 26, 2016
6ef3599
Update utils.py
aayux Oct 31, 2016
075d925
modified: gensim/utils.py
aayux Oct 31, 2016
bb53ccf
modified: gensim/test/test_doc2vec.py
aayux Oct 31, 2016
e39b8d6
modified: gensim/test/test_word2vec.py
aayux Oct 31, 2016
cec0a1b
modified: gensim/test/test_doc2vec.py
aayux Oct 31, 2016
c09e4d8
modified: gensim/test/test_doc2vec.py
aayux Oct 31, 2016
ea89bd5
modified: gensim/test/test_doc2vec.py
aayux Oct 31, 2016
5d743f1
modified: gensim/test/test_doc2vec.py
aayux Oct 31, 2016
d045bd2
modified: gensim/test/test_doc2vec.py
aayux Oct 31, 2016
07b3700
Merge https://github.com/RaRe-Technologies/gensim into develop
aayux Dec 4, 2016
40f2a50
Update test_word2vec.py
aayux Dec 4, 2016
43c7b08
Update test_doc2vec.py
aayux Dec 4, 2016
b50d60a
Update test_doc2vec.py
aayux Dec 4, 2016
eba33bc
Update test_word2vec.py
aayux Dec 4, 2016
6ffacdf
Update doc2vec.py
aayux Dec 4, 2016
f0137b8
Update doc2vec.py
aayux Dec 4, 2016
3bf25fc
Update test_word2vec.py
aayux Dec 4, 2016
7519519
Update test_word2vec.py
aayux Dec 4, 2016
2d28b03
Update test_word2vec.py
aayux Dec 4, 2016
19d2035
Update test_doc2vec.py
aayux Dec 4, 2016
e68bb13
Update test_word2vec.py
aayux Dec 4, 2016
37c9149
Merge pull request #2 from dust0x/tests
aayux Dec 4, 2016
d9b45c3
Merge branch 'develop' into develop
tmylk Dec 22, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion gensim/models/doc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
repeat as np_repeat, array, float32 as REAL, empty, ones, memmap as np_memmap, \
sqrt, newaxis, ndarray, dot, vstack, dtype, divide as np_divide


from gensim.utils import call_on_class_only
from gensim import utils, matutils # utility fnc for pickling, common scipy operations etc
from gensim.models.word2vec import Word2Vec, Vocab, train_cbow_pair, train_sg_pair, train_batch_sg
from six.moves import xrange, zip
Expand Down Expand Up @@ -603,10 +605,13 @@ def __init__(self, documents=None, dm_mean=None,
super(Doc2Vec, self).__init__(
sg=(1 + dm) % 2,
null_word=dm_concat, **kwargs)

self.load = call_on_class_only
self.load_word2vec_format = call_on_class_only

if dm_mean is not None:
self.cbow_mean = dm_mean

self.dbow_words = dbow_words
self.dm_concat = dm_concat
self.dm_tag_count = dm_tag_count
Expand Down
4 changes: 4 additions & 0 deletions gensim/models/word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
import threading
import itertools

from gensim.utils import keep_vocab_item, call_on_class_only
from gensim.utils import keep_vocab_item
from gensim.models.keyedvectors import KeyedVectors

Expand Down Expand Up @@ -421,6 +422,9 @@ def __init__(

"""

self.load = call_on_class_only
Copy link
Owner

@piskvorky piskvorky Nov 3, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But this will leave the attribute function unbound, leading to potential confusion / serialization issues.

Proper way would be to bind it properly with types.MethodType or somesuch.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the serialization tests pass, and the exception message is understandable if it's ever called (by mistake), I don't see what other clarity would come from binding the method. That'd just make it look more like an instance-specific operation... which it specifically is not.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@piskvorky, that is what I did at the beginning. Apart from the points @gojomo raises, there are some issues with the way types.MethodTypes is handled in Python versions 2 and 3.

self.load_word2vec_format = call_on_class_only

if FAST_VERSION == -1:
logger.warning('Slow version of {0} is being used'.format(__name__))
else:
Expand Down
10 changes: 10 additions & 0 deletions gensim/test/test_doc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ def testfile():
# temporary data will be stored to this file
return os.path.join(tempfile.gettempdir(), 'gensim_doc2vec.tst')

def load_on_instance():
# Save and load a Doc2Vec Model on instance for test
model = doc2vec.Doc2Vec(DocsLeeCorpus(), min_count=1)
model.save(testfile())
model = doc2vec.Doc2Vec() # should fail at this point
return model.load(testfile())

class TestDoc2VecModel(unittest.TestCase):
def test_persistence(self):
Expand Down Expand Up @@ -342,6 +348,10 @@ def testTrainWarning(self, l):
model.alpha += 0.05
warning = "Effective 'alpha' higher than previous training cycles"
self.assertTrue(warning in str(l))

def testLoadOnClassError(self):
"""Test if exception is raised when loading doc2vec model on instance"""
self.assertRaises(AttributeError, load_on_instance)
#endclass TestDoc2VecModel


Expand Down
16 changes: 12 additions & 4 deletions gensim/test/test_word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,17 @@ def testfile():
# temporary data will be stored to this file
return os.path.join(tempfile.gettempdir(), 'gensim_word2vec.tst')


def _rule(word, count, min_count):
if word == "human":
return utils.RULE_DISCARD # throw out
else:
return utils.RULE_DEFAULT # apply default rule, i.e. min_count

def load_on_instance():
# Save and load a Word2Vec Model on instance for test
model = word2vec.Word2Vec(sentences, min_count=1)
model.save(testfile())
model = word2vec.Word2Vec() # should fail at this point
return model.load(testfile())

class TestWord2VecModel(unittest.TestCase):
def testOnlineLearning(self):
Expand Down Expand Up @@ -585,14 +589,18 @@ def testTrainWarning(self, l):
model.alpha += 0.05
warning = "Effective 'alpha' higher than previous training cycles"
self.assertTrue(warning in str(l))
#endclass TestWord2VecModel


def test_sentences_should_not_be_a_generator(self):
"""
Is sentences a generator object?
"""
gen = (s for s in sentences)
self.assertRaises(TypeError, word2vec.Word2Vec, (gen,))

def testLoadOnClassError(self):
"""Test if exception is raised when loading word2vec model on instance"""
self.assertRaises(AttributeError, load_on_instance)
#endclass TestWord2VecModel

class TestWMD(unittest.TestCase):
def testNonzero(self):
Expand Down
4 changes: 4 additions & 0 deletions gensim/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,10 @@ def any2unicode(text, encoding='utf8', errors='strict'):
return unicode(text, encoding, errors=errors)
to_unicode = any2unicode

def call_on_class_only(*args, **kwargs):
"""Raise exception when load methods are called on instance"""
raise AttributeError('This method should be called on a class object.')


class SaveLoad(object):
"""
Expand Down