Skip to content

Commit

Permalink
Merge pull request #544 from NatLibFi/issue514-optimize-lazy-imports-…
Browse files Browse the repository at this point in the history
…take2

Optimize startup time using local & lazy imports (take 2)
  • Loading branch information
osma authored Dec 20, 2021
2 parents fbd1f92 + 40c1884 commit 5c6af91
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 53 deletions.
3 changes: 2 additions & 1 deletion annif/analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import abc
import functools
import unicodedata
import nltk.tokenize

_KEY_TOKEN_MIN_LENGTH = 'token_min_length'

Expand All @@ -22,6 +21,7 @@ def __init__(self, **kwargs):

def tokenize_sentences(self, text):
"""Tokenize a piece of text (e.g. a document) into sentences."""
import nltk.tokenize
return nltk.tokenize.sent_tokenize(text)

@functools.lru_cache(maxsize=50000)
Expand All @@ -37,6 +37,7 @@ def is_valid_token(self, word):

def tokenize_words(self, text):
"""Tokenize a piece of text (e.g. a sentence) into words."""
import nltk.tokenize
return [self.normalize_word(word)
for word in nltk.tokenize.word_tokenize(text)
if self.is_valid_token(word)]
Expand Down
2 changes: 1 addition & 1 deletion annif/analyzer/snowball.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Snowball analyzer for Annif, based on nltk Snowball stemmer."""

import functools
import nltk.stem.snowball
from . import analyzer


Expand All @@ -10,6 +9,7 @@ class SnowballAnalyzer(analyzer.Analyzer):

def __init__(self, param, **kwargs):
self.param = param
import nltk.stem.snowball
self.stemmer = nltk.stem.snowball.SnowballStemmer(param)
super().__init__(**kwargs)

Expand Down
138 changes: 88 additions & 50 deletions annif/backend/__init__.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,99 @@
"""Registry of backend types for Annif"""

from . import dummy
from . import ensemble
from . import http
from . import tfidf
from . import pav
from . import stwfsa
from . import mllm
from . import svc
import annif

# define functions for lazily importing each backend (alphabetical order)
def _dummy():
from . import dummy
return dummy.DummyBackend

_backend_types = {}

def _ensemble():
from . import ensemble
return ensemble.EnsembleBackend

def register_backend(backend):
_backend_types[backend.name] = backend

def _fasttext():
try:
from . import fasttext
return fasttext.FastTextBackend
except ImportError:
raise ValueError("fastText not available, cannot use fasttext backend")

def get_backend(backend_id):

def _http():
from . import http
return http.HTTPBackend


def _mllm():
from . import mllm
return mllm.MLLMBackend


def _nn_ensemble():
try:
return _backend_types[backend_id]
except KeyError:
raise ValueError("No such backend type {}".format(backend_id))
from . import nn_ensemble
return nn_ensemble.NNEnsembleBackend
except ImportError:
raise ValueError("Keras and TensorFlow not available, cannot use " +
"nn_ensemble backend")


def _omikuji():
try:
from . import omikuji
return omikuji.OmikujiBackend
except ImportError:
raise ValueError("Omikuji not available, cannot use omikuji backend")


def _pav():
from . import pav
return pav.PAVBackend


register_backend(dummy.DummyBackend)
register_backend(ensemble.EnsembleBackend)
register_backend(http.HTTPBackend)
register_backend(tfidf.TFIDFBackend)
register_backend(pav.PAVBackend)
register_backend(stwfsa.StwfsaBackend)
register_backend(mllm.MLLMBackend)
register_backend(svc.SVCBackend)

# Optional backends
try:
from . import fasttext
register_backend(fasttext.FastTextBackend)
except ImportError:
annif.logger.debug("fastText not available, not enabling fasttext backend")

try:
from . import nn_ensemble
register_backend(nn_ensemble.NNEnsembleBackend)
except ImportError:
annif.logger.debug("Keras and TensorFlow not available, not enabling " +
"nn_ensemble backend")

try:
from . import omikuji
register_backend(omikuji.OmikujiBackend)
except ImportError:
annif.logger.debug("Omikuji not available, not enabling omikuji backend")

try:
from . import yake
register_backend(yake.YakeBackend)
except ImportError:
annif.logger.debug("YAKE not available, not enabling yake backend")
def _stwfsa():
from . import stwfsa
return stwfsa.StwfsaBackend


def _svc():
from . import svc
return svc.SVCBackend


def _tfidf():
from . import tfidf
return tfidf.TFIDFBackend


def _yake():
try:
from . import yake
return yake.YakeBackend
except ImportError:
raise ValueError("YAKE not available, cannot use yake backend")


# registry of the above functions
_backend_fns = {
'dummy': _dummy,
'ensemble': _ensemble,
'fasttext': _fasttext,
'http': _http,
'mllm': _mllm,
'nn_ensemble': _nn_ensemble,
'omikuji': _omikuji,
'pav': _pav,
'stwfsa': _stwfsa,
'svc': _svc,
'tfidf': _tfidf,
'yake': _yake
}


def get_backend(backend_id):
if backend_id in _backend_fns:
return _backend_fns[backend_id]()
else:
raise ValueError("No such backend type {}".format(backend_id))
3 changes: 2 additions & 1 deletion annif/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from flask.cli import FlaskGroup, ScriptInfo
import annif
import annif.corpus
import annif.eval
import annif.parallel
import annif.project
import annif.registry
Expand Down Expand Up @@ -89,6 +88,7 @@ def validate_backend_params(backend, beparam, project):


def generate_filter_batches(subjects):
import annif.eval
filter_batches = collections.OrderedDict()
for limit in range(1, BATCH_MAX_LIMIT + 1):
for threshold in [i * 0.05 for i in range(20)]:
Expand Down Expand Up @@ -347,6 +347,7 @@ def run_eval(
project = get_project(project_id)
backend_params = parse_backend_params(backend_param, project)

import annif.eval
eval_batch = annif.eval.EvaluationBatch(project.subjects)

if results_file:
Expand Down
33 changes: 33 additions & 0 deletions tests/test_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import annif
import annif.backend
import annif.corpus
import importlib.util


def test_get_backend_nonexistent():
Expand Down Expand Up @@ -50,3 +51,35 @@ def test_fill_params_with_defaults(project):
project=project)
expected_default_params = {'limit': 100}
assert expected_default_params == dummy.params


@pytest.mark.skipif(importlib.util.find_spec("fasttext") is not None,
reason="test requires that fastText is NOT installed")
def test_get_backend_fasttext_not_installed():
with pytest.raises(ValueError) as excinfo:
annif.backend.get_backend('fasttext')
assert 'fastText not available' in str(excinfo.value)


@pytest.mark.skipif(importlib.util.find_spec("tensorflow") is not None,
reason="test requires that TensorFlow is NOT installed")
def test_get_backend_nn_ensemble_not_installed():
with pytest.raises(ValueError) as excinfo:
annif.backend.get_backend('nn_ensemble')
assert 'TensorFlow not available' in str(excinfo.value)


@pytest.mark.skipif(importlib.util.find_spec("omikuji") is not None,
reason="test requires that Omikuji is NOT installed")
def test_get_backend_omikuji_not_installed():
with pytest.raises(ValueError) as excinfo:
annif.backend.get_backend('omikuji')
assert 'Omikuji not available' in str(excinfo.value)


@pytest.mark.skipif(importlib.util.find_spec("yake") is not None,
reason="test requires that YAKE is NOT installed")
def test_get_backend_yake_not_installed():
with pytest.raises(ValueError) as excinfo:
annif.backend.get_backend('yake')
assert 'YAKE not available' in str(excinfo.value)

0 comments on commit 5c6af91

Please sign in to comment.