You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
As pointed out in #303, it's impossible to train from a directory with .txt and .key files which contain only subject labels but no URIs. Example traceback with tfidf backend:
Traceback (most recent call last):
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/bin/annif", line 11, in <module>
load_entry_point('annif', 'console_scripts', 'annif')()
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/click/core.py", line 764, in __call__
return self.main(*args, **kwargs)
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/flask/cli.py", line 569, in main
return super(FlaskGroup, self).main(*args, **kwargs)
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/click/core.py", line 717, in main
rv = self.invoke(ctx)
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/click/core.py", line 1137, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/click/core.py", line 956, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/click/core.py", line 555, in invoke
return callback(*args, **kwargs)
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/click/decorators.py", line 17, in new_func
return f(get_current_context(), *args, **kwargs)
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/flask/cli.py", line 419, in decorator
return __ctx.invoke(f, *args, **kwargs)
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/click/core.py", line 555, in invoke
return callback(*args, **kwargs)
File "/home/local/oisuomin/git/Annif/annif/cli.py", line 154, in run_train
proj.train(documents)
File "/home/local/oisuomin/git/Annif/annif/project.py", line 197, in train
self._create_vectorizer(corpus)
File "/home/local/oisuomin/git/Annif/annif/project.py", line 186, in _create_vectorizer
self._vectorizer.fit((subj.text for subj in subjectcorpus.subjects))
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/sklearn/feature_extraction/text.py", line 1631, in fit
X = super().fit_transform(raw_documents)
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/sklearn/feature_extraction/text.py", line 1058, in fit_transform
self.fixed_vocabulary_)
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/sklearn/feature_extraction/text.py", line 989, in _count_vocab
raise ValueError("empty vocabulary; perhaps the documents only"
ValueError: empty vocabulary; perhaps the documents only contain stop words
With fasttext backend:
Traceback (most recent call last):
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/bin/annif", line 11, in <module>
load_entry_point('annif', 'console_scripts', 'annif')()
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/click/core.py", line 764, in __call__
return self.main(*args, **kwargs)
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/flask/cli.py", line 569, in main
return super(FlaskGroup, self).main(*args, **kwargs)
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/click/core.py", line 717, in main
rv = self.invoke(ctx)
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/click/core.py", line 1137, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/click/core.py", line 956, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/click/core.py", line 555, in invoke
return callback(*args, **kwargs)
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/click/decorators.py", line 17, in new_func
return f(get_current_context(), *args, **kwargs)
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/flask/cli.py", line 419, in decorator
return __ctx.invoke(f, *args, **kwargs)
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/click/core.py", line 555, in invoke
return callback(*args, **kwargs)
File "/home/local/oisuomin/git/Annif/annif/cli.py", line 154, in run_train
proj.train(documents)
File "/home/local/oisuomin/git/Annif/annif/project.py", line 198, in train
self.backend.train(corpus, project=self)
File "/home/local/oisuomin/git/Annif/annif/backend/fasttext.py", line 108, in train
self._create_model()
File "/home/local/oisuomin/git/Annif/annif/backend/fasttext.py", line 103, in _create_model
self._model = fastText.train_supervised(trainpath, **params)
File "/home/oisuomin/.local/share/virtualenvs/Annif-G8ShVyyO/lib/python3.5/site-packages/fastText/FastText.py", line 343, in train_supervised
fasttext.train(ft.f, a)
ValueError: Empty vocabulary. Try a smaller -minCount value.
The problem seems to be that subject labels are not being converted to URIs internally, although they should.
The text was updated successfully, but these errors were encountered:
As pointed out in #303, it's impossible to train from a directory with
.txt
and.key
files which contain only subject labels but no URIs. Example traceback withtfidf
backend:With
fasttext
backend:The problem seems to be that subject labels are not being converted to URIs internally, although they should.
The text was updated successfully, but these errors were encountered: