Skip to content

Commit

Permalink
Merge pull request #233 from NatLibFi/issue229-fasttext-optional
Browse files Browse the repository at this point in the history
Make fastText an optional dependency
  • Loading branch information
osma authored Jan 15, 2019
2 parents 8c8e1d4 + 1c0dfba commit a716105
Show file tree
Hide file tree
Showing 9 changed files with 37 additions and 15 deletions.
6 changes: 4 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ install:
- pip install pipenv
- pip install --upgrade pytest
- pipenv install --dev --skip-lock
# install optional dependencies that were not specified in Pipfile
- pip install voikko
# For Python 3.5, also install optional dependencies that were not specified in Pipfile
# For other Python versions we will only run the tests that depend on pure Python modules
- if [[ $TRAVIS_PYTHON_VERSION == '3.5' ]]; then pip install fasttextmirror; fi
- if [[ $TRAVIS_PYTHON_VERSION == '3.5' ]]; then pip install voikko; fi
- travis_wait 30 python -m nltk.downloader punkt
script:
- pytest --cov=./
Expand Down
1 change: 0 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ click-log = "*"
nltk = "*"
gensim = "*"
sklearn = "*"
fasttextmirror = "*"
rdflib = "*"

[requires]
1 change: 0 additions & 1 deletion annif/analyzer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ def get_analyzer(analyzerspec):

# Optional analyzers
try:
import voikko as _voikko
from . import voikko
register_analyzer(voikko.VoikkoAnalyzer)
except ImportError:
Expand Down
10 changes: 8 additions & 2 deletions annif/backend/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from . import ensemble
from . import http
from . import tfidf
from . import fasttext
from . import pav
import annif


_backend_types = {}
Expand All @@ -28,5 +28,11 @@ def get_backend(backend_id):
register_backend(ensemble.EnsembleBackend)
register_backend(http.HTTPBackend)
register_backend(tfidf.TFIDFBackend)
register_backend(fasttext.FastTextBackend)
register_backend(pav.PAVBackend)

# Optional backends
try:
from . import fasttext
register_backend(fasttext.FastTextBackend)
except ImportError:
annif.logger.debug("fastText not available, not enabling fasttext backend")
25 changes: 18 additions & 7 deletions annif/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class AnnifProject:

# defaults for uninitialized instances
_analyzer = None
_backend = None
_vocab = None
_vectorizer = None
initialized = False
Expand All @@ -36,7 +37,7 @@ def __init__(self, project_id, config, datadir):
self.vocab_id = config.get('vocab', None)
self._base_datadir = datadir
self._datadir = os.path.join(datadir, 'projects', self.project_id)
self.backend = self._setup_backend(config)
self.config = config

def _get_datadir(self):
"""return the path of the directory where this project can store its
Expand All @@ -45,11 +46,6 @@ def _get_datadir(self):
os.makedirs(self._datadir)
return self._datadir

def _setup_backend(self, config):
backend_id = config['backend']
backend_type = annif.backend.get_backend(backend_id)
return backend_type(backend_id, params=config, datadir=self._datadir)

def _initialize_analyzer(self):
analyzer = self.analyzer
logger.debug("Project '%s': initialized analyzer: %s",
Expand All @@ -76,6 +72,9 @@ def _initialize_vectorizer(self):

def _initialize_backend(self):
logger.debug("Project '%s': initializing backend", self.project_id)
if not self.backend:
logger.debug("Cannot initialize backend: does not exist")
return
try:
self.backend.initialize()
except AnnifException as err:
Expand Down Expand Up @@ -109,6 +108,18 @@ def analyzer(self):
self._analyzer = annif.analyzer.get_analyzer(self.analyzer_spec)
return self._analyzer

@property
def backend(self):
if self._backend is None:
backend_id = self.config['backend']
try:
backend_class = annif.backend.get_backend(backend_id)
self._backend = backend_class(
backend_id, params=self.config, datadir=self._datadir)
except ValueError:
logger.debug("Could not create backend %s", backend_id)
return self._backend

@property
def vocab(self):
if self._vocab is None:
Expand Down Expand Up @@ -172,7 +183,7 @@ def dump(self):
return {'project_id': self.project_id,
'name': self.name,
'language': self.language,
'backend': {'backend_id': self.backend.backend_id}
'backend': {'backend_id': self.config['backend']}
}


Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ def read(fname):
'nltk',
'gensim',
'sklearn',
'fasttextmirror',
'rdflib'],
extras_require={
'fasttext': ['fasttextmirror'],
'voikko': ['voikko'],
},
entry_points={
Expand Down
2 changes: 1 addition & 1 deletion tests/test_analyzer_voikko.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest
import annif.analyzer

voikko = pytest.importorskip("voikko")
voikko = pytest.importorskip("annif.analyzer.voikko")


def test_voikko_getstate():
Expand Down
3 changes: 3 additions & 0 deletions tests/test_backend_fasttext.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
"""Unit tests for the fastText backend in Annif"""

import pytest
import annif.backend
import annif.corpus

fasttext = pytest.importorskip("annif.backend.fasttext")


def test_fasttext_load_documents(datadir, document_corpus, project):
fasttext_type = annif.backend.get_backend("fasttext")
Expand Down
2 changes: 2 additions & 0 deletions tests/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def test_project_load_documents_tfidf(app, document_corpus, testdatadir):


def test_project_load_vocabulary_fasttext(app, vocabulary, testdatadir):
pytest.importorskip("annif.backend.fasttext")
with app.app_context():
project = annif.project.get_project('fasttext-fi')
project.vocab.load_vocabulary(vocabulary)
Expand All @@ -79,6 +80,7 @@ def test_project_load_vocabulary_fasttext(app, vocabulary, testdatadir):


def test_project_load_documents_fasttext(app, document_corpus, testdatadir):
pytest.importorskip("annif.backend.fasttext")
with app.app_context():
project = annif.project.get_project('fasttext-fi')
project.load_documents(document_corpus)
Expand Down

0 comments on commit a716105

Please sign in to comment.