Skip to content

Commit

Permalink
Split up TFIDFBackend.train
Browse files Browse the repository at this point in the history
  • Loading branch information
osma committed Oct 4, 2019
1 parent b6dbbcd commit 0c6ee3b
Showing 1 changed file with 11 additions and 8 deletions.
19 changes: 11 additions & 8 deletions annif/backend/tfidf.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,16 @@ def initialize(self):
self._initialize_vectorizer()
self._initialize_index()

def _create_index(self, veccorpus):
self.info('creating similarity index')
gscorpus = Sparse2Corpus(veccorpus, documents_columns=False)
self._index = gensim.similarities.SparseMatrixSimilarity(
gscorpus, num_features=len(self._vectorizer.vocabulary_))
annif.util.atomic_save(
self._index,
self.datadir,
self.INDEX_FILE)

def train(self, corpus, project):
if corpus.is_empty():
raise NotSupportedException(
Expand All @@ -67,14 +77,7 @@ def train(self, corpus, project):
self.datadir,
self.VECTORIZER_FILE,
method=joblib.dump)
self.info('creating similarity index')
gscorpus = Sparse2Corpus(veccorpus, documents_columns=False)
self._index = gensim.similarities.SparseMatrixSimilarity(
gscorpus, num_features=len(self._vectorizer.vocabulary_))
annif.util.atomic_save(
self._index,
self.datadir,
self.INDEX_FILE)
self._create_index(veccorpus)

def _suggest(self, text, project, params):
self.debug('Suggesting subjects for text "{}..." (len={})'.format(
Expand Down

0 comments on commit 0c6ee3b

Please sign in to comment.