diff --git a/annif/backend/backend.py b/annif/backend/backend.py index 817b6b23f..93dbc0ead 100644 --- a/annif/backend/backend.py +++ b/annif/backend/backend.py @@ -12,13 +12,25 @@ class AnnifBackend(metaclass=abc.ABCMeta): needs_subject_index = False needs_subject_vectorizer = False - def __init__(self, backend_id, params, datadir): + DEFAULT_PARAMS = {'limit': 100} + + def __init__(self, backend_id, config_params, datadir): """Initialize backend with specific parameters. The parameters are a dict. Keys and values depend on the specific backend type.""" self.backend_id = backend_id - self.params = params self.datadir = datadir + self.config_params = config_params + + def default_params(self): + return self.DEFAULT_PARAMS + + @property + def params(self): + params = {} + params.update(self.default_params()) + params.update(self.config_params) + return params def train(self, corpus, project): """train the model on the given document or subject corpus""" diff --git a/annif/backend/dummy.py b/annif/backend/dummy.py index 1282eca33..2dcb99f35 100644 --- a/annif/backend/dummy.py +++ b/annif/backend/dummy.py @@ -11,6 +11,9 @@ class DummyBackend(backend.AnnifLearningBackend): uri = 'http://example.org/dummy' label = 'dummy' + def default_params(self): + return backend.AnnifBackend.DEFAULT_PARAMS + def initialize(self): self.initialized = True diff --git a/annif/backend/fasttext.py b/annif/backend/fasttext.py index 9cd52d544..a9561361d 100644 --- a/annif/backend/fasttext.py +++ b/annif/backend/fasttext.py @@ -33,12 +33,25 @@ class FastTextBackend(mixins.ChunkingBackend, backend.AnnifBackend): 't': float } + DEFAULT_PARAMS = { + 'dim': 100, + 'lr': 0.25, + 'epoch': 5, + 'loss': 'hs', + } + MODEL_FILE = 'fasttext-model' TRAIN_FILE = 'fasttext-train.txt' # defaults for uninitialized instances _model = None + def default_params(self): + params = backend.AnnifBackend.DEFAULT_PARAMS.copy() + params.update(mixins.ChunkingBackend.DEFAULT_PARAMS) + params.update(self.DEFAULT_PARAMS) + return params + def initialize(self): if self._model is None: path = os.path.join(self.datadir, self.MODEL_FILE) diff --git a/annif/backend/mixins.py b/annif/backend/mixins.py index 04774a58c..5cdc1dc06 100644 --- a/annif/backend/mixins.py +++ b/annif/backend/mixins.py @@ -8,6 +8,11 @@ class ChunkingBackend(metaclass=abc.ABCMeta): """Annif backend mixin that implements chunking of input""" + DEFAULT_PARAMS = {'chunksize': 1} + + def default_params(self): + return self.DEFAULT_PARAMS + @abc.abstractmethod def _suggest_chunks(self, chunktexts, project): """Suggest subjects for the chunked text; should be implemented by diff --git a/annif/backend/pav.py b/annif/backend/pav.py index 407f430a3..0232a2876 100644 --- a/annif/backend/pav.py +++ b/annif/backend/pav.py @@ -24,6 +24,8 @@ class PAVBackend(ensemble.EnsembleBackend): # defaults for uninitialized instances _models = None + DEFAULT_PARAMS = {'min-docs': 10} + def initialize(self): if self._models is not None: return # already initialized diff --git a/annif/backend/vw_ensemble.py b/annif/backend/vw_ensemble.py index 94cf28fa7..1d731f937 100644 --- a/annif/backend/vw_ensemble.py +++ b/annif/backend/vw_ensemble.py @@ -10,6 +10,7 @@ import numpy as np from annif.exception import NotInitializedException from annif.suggestion import VectorSuggestionResult +from . import backend from . import vw_base from . import ensemble @@ -42,7 +43,16 @@ class VWEnsembleBackend( # a simple mean of scores. A higher value will mean that the model # adapts quicker (and possibly makes more errors) while a lower value # will make it more careful so that it will require more training data. - DEFAULT_DISCOUNT_RATE = 0.01 + + DEFAULT_PARAMS = {'discount_rate': 0.01} + + def default_params(self): + params = backend.AnnifBackend.DEFAULT_PARAMS.copy() + params.update(self.DEFAULT_PARAMS) + params.update({param: default_val + for param, (_, default_val) in self.VW_PARAMS.items() + if default_val is not None}) + return params def _load_subject_freq(self): path = os.path.join(self.datadir, self.FREQ_FILE) @@ -75,8 +85,7 @@ def _calculate_scores(self, subj_id, subj_score_vector): def _merge_hits_from_sources(self, hits_from_sources, project, params): score_vector = np.array([hits.vector for hits, _ in hits_from_sources]) - discount_rate = float(self.params.get('discount_rate', - self.DEFAULT_DISCOUNT_RATE)) + discount_rate = float(self.params['discount_rate']) result = np.zeros(score_vector.shape[1]) for subj_id in range(score_vector.shape[1]): subj_score_vector = score_vector[:, subj_id] diff --git a/annif/backend/vw_multi.py b/annif/backend/vw_multi.py index 3c3aef9e7..63cef305d 100644 --- a/annif/backend/vw_multi.py +++ b/annif/backend/vw_multi.py @@ -7,6 +7,7 @@ from annif.suggestion import ListSuggestionResult, VectorSuggestionResult from annif.exception import ConfigurationException from . import vw_base +from . import backend from . import mixins @@ -27,14 +28,24 @@ class VWMultiBackend(mixins.ChunkingBackend, vw_base.VWBaseBackend): 'probabilities': (bool, None) } - DEFAULT_ALGORITHM = 'oaa' SUPPORTED_ALGORITHMS = ('oaa', 'ect', 'log_multi', 'multilabel_oaa') DEFAULT_INPUTS = '_text_' + DEFAULT_PARAMS = {'algorithm': 'oaa'} + + def default_params(self): + params = backend.AnnifBackend.DEFAULT_PARAMS.copy() + params.update(mixins.ChunkingBackend.DEFAULT_PARAMS) + params.update(self.DEFAULT_PARAMS) + params.update({param: default_val + for param, (_, default_val) in self.VW_PARAMS.items() + if default_val is not None}) + return params + @property def algorithm(self): - algorithm = self.params.get('algorithm', self.DEFAULT_ALGORITHM) + algorithm = self.params['algorithm'] if algorithm not in self.SUPPORTED_ALGORITHMS: raise ConfigurationException( "{} is not a valid algorithm (allowed: {})".format( diff --git a/annif/project.py b/annif/project.py index 34af903dd..7b9b5a909 100644 --- a/annif/project.py +++ b/annif/project.py @@ -45,7 +45,7 @@ class AnnifProject(DatadirMixin): def __init__(self, project_id, config, datadir): DatadirMixin.__init__(self, datadir, 'projects', project_id) self.project_id = project_id - self.name = config['name'] + self.name = config.get('name', project_id) self.language = config['language'] self.analyzer_spec = config.get('analyzer', None) self.vocab_id = config.get('vocab', None) @@ -144,7 +144,8 @@ def backend(self): try: backend_class = annif.backend.get_backend(backend_id) self._backend = backend_class( - backend_id, params=self.config, datadir=self.datadir) + backend_id, config_params=self.config, + datadir=self.datadir) except ValueError: logger.warning( "Could not create backend %s, " diff --git a/tests/projects.cfg b/tests/projects.cfg index eaccea36f..5e7d16733 100644 --- a/tests/projects.cfg +++ b/tests/projects.cfg @@ -50,6 +50,26 @@ language=en vocab=dummy analyzer=snowball(english) +[noname] +language=en +backend=tfidf +vocab=dummy +analyzer=snowball(english) + +[noparams-tfidf-fi] +name=TF-IDF Finnish using default params +language=fi +backend=tfidf +analyzer=snowball(finnish) +vocab=yso-fi + +[noparams-fasttext-fi] +name=fastText Finnish using default params +language=fi +backend=fasttext +analyzer=snowball(finnish) +vocab=yso-fi + [pav] name=PAV Ensemble Finnish language=fi diff --git a/tests/test_backend.py b/tests/test_backend.py index 988cad111..104b0e7c6 100644 --- a/tests/test_backend.py +++ b/tests/test_backend.py @@ -1,6 +1,7 @@ """Unit tests for backends in Annif""" import pytest +import logging import annif import annif.backend import annif.corpus @@ -13,7 +14,7 @@ def test_get_backend_nonexistent(): def test_get_backend_dummy(app, project): dummy_type = annif.backend.get_backend("dummy") - dummy = dummy_type(backend_id='dummy', params={}, + dummy = dummy_type(backend_id='dummy', config_params={}, datadir=app.config['DATADIR']) result = dummy.suggest(text='this is some text', project=project) assert len(result) == 1 @@ -24,7 +25,7 @@ def test_get_backend_dummy(app, project): def test_learn_dummy(app, project, tmpdir): dummy_type = annif.backend.get_backend("dummy") - dummy = dummy_type(backend_id='dummy', params={}, + dummy = dummy_type(backend_id='dummy', config_params={}, datadir=app.config['DATADIR']) tmpdir.join('doc1.txt').write('doc1') @@ -40,3 +41,11 @@ def test_learn_dummy(app, project, tmpdir): assert result[0].uri == 'http://example.org/key1' assert result[0].label == 'key1' assert result[0].score == 1.0 + + +def test_fill_params_with_defaults(app): + dummy_type = annif.backend.get_backend('dummy') + dummy = dummy_type(backend_id='dummy', config_params={}, + datadir=app.config['DATADIR']) + expected_default_params = {'limit': 100} # From AnnifBackend class + assert expected_default_params == dummy.params diff --git a/tests/test_backend_fasttext.py b/tests/test_backend_fasttext.py index 8afe14b25..25d1ada8a 100644 --- a/tests/test_backend_fasttext.py +++ b/tests/test_backend_fasttext.py @@ -8,11 +8,31 @@ fasttext = pytest.importorskip("annif.backend.fasttext") +def test_fasttext_default_params(datadir, project): + fasttext_type = annif.backend.get_backend("fasttext") + fasttext = fasttext_type( + backend_id='fasttext', + config_params={}, + datadir=str(datadir)) + + expected_default_params = { + 'limit': 100, + 'chunksize': 1, + 'dim': 100, + 'lr': 0.25, + 'epoch': 5, + 'loss': 'hs', + } + actual_params = fasttext.params + for param, val in expected_default_params.items(): + assert param in actual_params and actual_params[param] == val + + def test_fasttext_train(datadir, document_corpus, project): fasttext_type = annif.backend.get_backend("fasttext") fasttext = fasttext_type( backend_id='fasttext', - params={ + config_params={ 'limit': 50, 'dim': 100, 'lr': 0.25, @@ -30,7 +50,7 @@ def test_fasttext_train_unknown_subject(tmpdir, datadir, project): fasttext_type = annif.backend.get_backend("fasttext") fasttext = fasttext_type( backend_id='fasttext', - params={ + config_params={ 'limit': 50, 'dim': 100, 'lr': 0.25, @@ -53,7 +73,7 @@ def test_fasttext_train_nodocuments(tmpdir, datadir, project): fasttext_type = annif.backend.get_backend("fasttext") fasttext = fasttext_type( backend_id='fasttext', - params={ + config_params={ 'limit': 50, 'dim': 100, 'lr': 0.25, @@ -73,7 +93,7 @@ def test_fasttext_suggest(datadir, project): fasttext_type = annif.backend.get_backend("fasttext") fasttext = fasttext_type( backend_id='fasttext', - params={ + config_params={ 'limit': 50, 'chunksize': 1, 'dim': 100, diff --git a/tests/test_backend_http.py b/tests/test_backend_http.py index ff601a504..aa634b815 100644 --- a/tests/test_backend_http.py +++ b/tests/test_backend_http.py @@ -17,7 +17,7 @@ def test_http_suggest(app, project): http_type = annif.backend.get_backend("http") http = http_type( backend_id='http', - params={ + config_params={ 'endpoint': 'http://api.example.org/analyze', 'project': 'dummy'}, datadir=app.config['DATADIR']) @@ -40,7 +40,7 @@ def test_http_suggest_with_results(app, project): http_type = annif.backend.get_backend("http") http = http_type( backend_id='http', - params={ + config_params={ 'endpoint': 'http://api.example.org/dummy/analyze', }, datadir=app.config['DATADIR']) @@ -63,7 +63,7 @@ def test_http_suggest_zero_score(app, project): http_type = annif.backend.get_backend("http") http = http_type( backend_id='http', - params={ + config_params={ 'endpoint': 'http://api.example.org/analyze', 'project': 'dummy'}, datadir=app.config['DATADIR']) @@ -79,7 +79,7 @@ def test_http_suggest_error(app, project): http_type = annif.backend.get_backend("http") http = http_type( backend_id='http', - params={ + config_params={ 'endpoint': 'http://api.example.org/analyze', 'project': 'dummy'}, datadir=app.config['DATADIR']) @@ -98,7 +98,7 @@ def test_http_suggest_json_fails(app, project): http_type = annif.backend.get_backend("http") http = http_type( backend_id='http', - params={ + config_params={ 'endpoint': 'http://api.example.org/analyze', 'project': 'dummy'}, datadir=app.config['DATADIR']) @@ -117,7 +117,7 @@ def test_http_suggest_unexpected_json(app, project): http_type = annif.backend.get_backend("http") http = http_type( backend_id='http', - params={ + config_params={ 'endpoint': 'http://api.example.org/analyze', 'project': 'dummy'}, datadir=app.config['DATADIR']) diff --git a/tests/test_backend_pav.py b/tests/test_backend_pav.py index 36aaed11b..d2810d4fe 100644 --- a/tests/test_backend_pav.py +++ b/tests/test_backend_pav.py @@ -6,11 +6,26 @@ from annif.exception import NotSupportedException +def test_pav_default_params(datadir, document_corpus, project): + pav_type = annif.backend.get_backend("pav") + pav = pav_type( + backend_id='pav', + config_params={}, + datadir=str(datadir)) + + expected_default_params = { + 'min-docs': 10, + } + actual_params = pav.params + for param, val in expected_default_params.items(): + assert param in actual_params and actual_params[param] == val + + def test_pav_train(app, datadir, tmpdir, project): pav_type = annif.backend.get_backend("pav") pav = pav_type( backend_id='pav', - params={'limit': 50, 'min-docs': 2, 'sources': 'dummy-fi'}, + config_params={'limit': 50, 'min-docs': 2, 'sources': 'dummy-fi'}, datadir=str(datadir)) tmpfile = tmpdir.join('document.tsv') @@ -29,7 +44,7 @@ def test_pav_train_nodocuments(tmpdir, datadir, project): pav_type = annif.backend.get_backend("pav") pav = pav_type( backend_id='pav', - params={'limit': 50, 'min-docs': 2, 'sources': 'dummy-fi'}, + config_params={'limit': 50, 'min-docs': 2, 'sources': 'dummy-fi'}, datadir=str(datadir)) empty_file = tmpdir.ensure('empty.tsv') @@ -44,7 +59,7 @@ def test_pav_initialize(app, datadir): pav_type = annif.backend.get_backend("pav") pav = pav_type( backend_id='pav', - params={'limit': 50, 'min-docs': 2, 'sources': 'dummy-fi'}, + config_params={'limit': 50, 'min-docs': 2, 'sources': 'dummy-fi'}, datadir=str(datadir)) assert pav._models is None @@ -60,7 +75,7 @@ def test_pav_suggest(app, datadir, project): pav_type = annif.backend.get_backend("pav") pav = pav_type( backend_id='pav', - params={'limit': 50, 'min-docs': 2, 'sources': 'dummy-fi'}, + config_params={'limit': 50, 'min-docs': 2, 'sources': 'dummy-fi'}, datadir=str(datadir)) results = pav.suggest("""Arkeologiaa sanotaan joskus myös diff --git a/tests/test_backend_tfidf.py b/tests/test_backend_tfidf.py index 4281e4892..238909c99 100644 --- a/tests/test_backend_tfidf.py +++ b/tests/test_backend_tfidf.py @@ -18,11 +18,26 @@ def project(document_corpus): return proj +def test_tfidf_default_params(datadir, project): + tfidf_type = annif.backend.get_backend("tfidf") + tfidf = tfidf_type( + backend_id='tfidf', + config_params={}, + datadir=str(datadir)) + + expected_default_params = { + 'limit': 100 # From AnnifBackend class + } + actual_params = tfidf.params + for param, val in expected_default_params.items(): + assert param in actual_params and actual_params[param] == val + + def test_tfidf_train(datadir, document_corpus, project): tfidf_type = annif.backend.get_backend("tfidf") tfidf = tfidf_type( backend_id='tfidf', - params={'limit': 10}, + config_params={'limit': 10}, datadir=str(datadir)) tfidf.train(document_corpus, project) @@ -35,7 +50,7 @@ def test_tfidf_suggest(datadir, project): tfidf_type = annif.backend.get_backend("tfidf") tfidf = tfidf_type( backend_id='tfidf', - params={'limit': 10}, + config_params={'limit': 10}, datadir=str(datadir)) results = tfidf.suggest("""Arkeologiaa sanotaan joskus myös @@ -55,7 +70,7 @@ def test_tfidf_suggest_unknown(datadir, project): tfidf_type = annif.backend.get_backend("tfidf") tfidf = tfidf_type( backend_id='tfidf', - params={'limit': 10}, + config_params={'limit': 10}, datadir=str(datadir)) results = tfidf.suggest("abcdefghijk", project) # unknown word diff --git a/tests/test_backend_vw_ensemble.py b/tests/test_backend_vw_ensemble.py index 23b48ff53..46386ffe8 100644 --- a/tests/test_backend_vw_ensemble.py +++ b/tests/test_backend_vw_ensemble.py @@ -11,11 +11,28 @@ pytest.importorskip("annif.backend.vw_ensemble") +def test_vw_ensemble_default_params(datadir, project): + vw_type = annif.backend.get_backend("vw_ensemble") + vw = vw_type( + backend_id='vw_ensemble', + config_params={}, + datadir=str(datadir)) + + expected_default_params = { + 'limit': 100, + 'discount_rate': 0.01, + 'loss_function': 'squared', + } + actual_params = vw.params + for param, val in expected_default_params.items(): + assert param in actual_params and actual_params[param] == val + + def test_vw_ensemble_suggest_no_model(datadir, project): vw_ensemble_type = annif.backend.get_backend('vw_ensemble') vw_ensemble = vw_ensemble_type( backend_id='vw_ensemble', - params={'sources': 'dummy-en'}, + config_params={'sources': 'dummy-en'}, datadir=str(datadir)) with pytest.raises(NotInitializedException): @@ -26,7 +43,7 @@ def test_vw_ensemble_train_and_learn(app, datadir, tmpdir): vw_ensemble_type = annif.backend.get_backend("vw_ensemble") vw_ensemble = vw_ensemble_type( backend_id='vw_ensemble', - params={'sources': 'dummy-en'}, + config_params={'sources': 'dummy-en'}, datadir=str(datadir)) tmpfile = tmpdir.join('document.tsv') @@ -67,7 +84,7 @@ def test_vw_ensemble_initialize(app, datadir): vw_ensemble_type = annif.backend.get_backend("vw_ensemble") vw_ensemble = vw_ensemble_type( backend_id='vw_ensemble', - params={'sources': 'dummy-en'}, + config_params={'sources': 'dummy-en'}, datadir=str(datadir)) assert vw_ensemble._model is None @@ -83,7 +100,7 @@ def test_vw_ensemble_suggest(app, datadir): vw_ensemble_type = annif.backend.get_backend("vw_ensemble") vw_ensemble = vw_ensemble_type( backend_id='vw_ensemble', - params={'sources': 'dummy-en'}, + config_params={'sources': 'dummy-en'}, datadir=str(datadir)) project = annif.project.get_project('dummy-en') @@ -103,7 +120,7 @@ def test_vw_ensemble_suggest_set_discount_rate(app, datadir): vw_ensemble_type = annif.backend.get_backend("vw_ensemble") vw_ensemble = vw_ensemble_type( backend_id='vw_ensemble', - params={'sources': 'dummy-en', 'discount_rate': '0.02'}, + config_params={'sources': 'dummy-en', 'discount_rate': '0.02'}, datadir=str(datadir)) project = annif.project.get_project('dummy-en') @@ -122,7 +139,7 @@ def test_vw_ensemble_format_example(datadir): vw_ensemble_type = annif.backend.get_backend("vw_ensemble") vw_ensemble = vw_ensemble_type( backend_id='vw_ensemble', - params={'sources': 'dummy-en'}, + config_params={'sources': 'dummy-en'}, datadir=str(datadir)) ex = vw_ensemble._format_example(0, [0.5]) @@ -133,7 +150,7 @@ def test_vw_ensemble_format_example_avoid_sci_notation(datadir): vw_ensemble_type = annif.backend.get_backend("vw_ensemble") vw_ensemble = vw_ensemble_type( backend_id='vw_ensemble', - params={'sources': 'dummy-en'}, + config_params={'sources': 'dummy-en'}, datadir=str(datadir)) ex = vw_ensemble._format_example(0, [7.24e-05]) diff --git a/tests/test_backend_vw_multi.py b/tests/test_backend_vw_multi.py index 88e6bbcf7..4d83268e3 100644 --- a/tests/test_backend_vw_multi.py +++ b/tests/test_backend_vw_multi.py @@ -18,11 +18,29 @@ def vw_corpus(tmpdir): return annif.corpus.DocumentFile(str(tmpfile)) +def test_vw_multi_default_params(datadir, project): + vw_type = annif.backend.get_backend("vw_multi") + vw = vw_type( + backend_id='vw_multi', + config_params={}, + datadir=str(datadir)) + + expected_default_params = { + 'limit': 100, + 'chunksize': 1, + 'algorithm': 'oaa', + 'loss_function': 'logistic', + } + actual_params = vw.params + for param, val in expected_default_params.items(): + assert param in actual_params and actual_params[param] == val + + def test_vw_multi_suggest_no_model(datadir, project): vw_type = annif.backend.get_backend('vw_multi') vw = vw_type( backend_id='vw_multi', - params={'chunksize': 4}, + config_params={'chunksize': 4}, datadir=str(datadir)) with pytest.raises(NotInitializedException): @@ -33,7 +51,7 @@ def test_vw_multi_train_and_learn(datadir, document_corpus, project): vw_type = annif.backend.get_backend('vw_multi') vw = vw_type( backend_id='vw_multi', - params={ + config_params={ 'chunksize': 4, 'learning_rate': 0.5, 'loss_function': 'hinge'}, @@ -59,7 +77,7 @@ def test_vw_multi_train_and_learn_nodocuments(datadir, tmpdir, project): vw_type = annif.backend.get_backend('vw_multi') vw = vw_type( backend_id='vw_multi', - params={ + config_params={ 'chunksize': 4, 'learning_rate': 0.5, 'loss_function': 'hinge'}, @@ -87,7 +105,7 @@ def test_vw_multi_train_from_project(app, datadir, document_corpus, project): vw_type = annif.backend.get_backend('vw_multi') vw = vw_type( backend_id='vw_multi', - params={ + config_params={ 'chunksize': 4, 'inputs': '_text_,dummy-en'}, datadir=str(datadir)) @@ -103,7 +121,7 @@ def test_vw_multi_train_multiple_passes(datadir, document_corpus, project): vw_type = annif.backend.get_backend('vw_multi') vw = vw_type( backend_id='vw_multi', - params={ + config_params={ 'chunksize': 4, 'learning_rate': 0.5, 'passes': 2}, @@ -119,7 +137,7 @@ def test_vw_multi_train_invalid_algorithm(datadir, document_corpus, project): vw_type = annif.backend.get_backend('vw_multi') vw = vw_type( backend_id='vw_multi', - params={ + config_params={ 'chunksize': 4, 'learning_rate': 0.5, 'algorithm': 'invalid'}, @@ -133,7 +151,7 @@ def test_vw_multi_train_invalid_loss_function(datadir, project, vw_corpus): vw_type = annif.backend.get_backend('vw_multi') vw = vw_type( backend_id='vw_multi', - params={'chunksize': 4, 'loss_function': 'invalid'}, + config_params={'chunksize': 4, 'loss_function': 'invalid'}, datadir=str(datadir)) with pytest.raises(ConfigurationException): @@ -144,7 +162,7 @@ def test_vw_multi_train_invalid_learning_rate(datadir, project, vw_corpus): vw_type = annif.backend.get_backend('vw_multi') vw = vw_type( backend_id='vw_multi', - params={'chunksize': 4, 'learning_rate': 'high'}, + config_params={'chunksize': 4, 'learning_rate': 'high'}, datadir=str(datadir)) with pytest.raises(ConfigurationException): @@ -155,7 +173,7 @@ def test_vw_multi_suggest(datadir, project): vw_type = annif.backend.get_backend('vw_multi') vw = vw_type( backend_id='vw_multi', - params={'chunksize': 4, 'probabilities': 1}, + config_params={'chunksize': 4, 'probabilities': 1}, datadir=str(datadir)) results = vw.suggest("""Arkeologiaa sanotaan joskus myös @@ -175,7 +193,7 @@ def test_vw_multi_suggest_empty(datadir, project): vw_type = annif.backend.get_backend('vw_multi') vw = vw_type( backend_id='vw_multi', - params={'chunksize': 4}, + config_params={'chunksize': 4}, datadir=str(datadir)) results = vw.suggest("...", project) @@ -187,7 +205,7 @@ def test_vw_multi_suggest_multiple_passes(datadir, project): vw_type = annif.backend.get_backend('vw_multi') vw = vw_type( backend_id='vw_multi', - params={'chunksize': 4, 'passes': 2}, + config_params={'chunksize': 4, 'passes': 2}, datadir=str(datadir)) results = vw.suggest("...", project) @@ -199,7 +217,7 @@ def test_vw_multi_train_ect(datadir, document_corpus, project): vw_type = annif.backend.get_backend('vw_multi') vw = vw_type( backend_id='vw_multi', - params={ + config_params={ 'chunksize': 4, 'learning_rate': 0.5, 'algorithm': 'ect'}, @@ -215,8 +233,8 @@ def test_vw_multi_suggest_ect(datadir, project): vw_type = annif.backend.get_backend('vw_multi') vw = vw_type( backend_id='vw_multi', - params={'chunksize': 1, - 'algorithm': 'ect'}, + config_params={'chunksize': 1, + 'algorithm': 'ect'}, datadir=str(datadir)) results = vw.suggest("""Arkeologiaa sanotaan joskus myös @@ -233,7 +251,7 @@ def test_vw_multi_train_log_multi(datadir, document_corpus, project): vw_type = annif.backend.get_backend('vw_multi') vw = vw_type( backend_id='vw_multi', - params={ + config_params={ 'chunksize': 4, 'learning_rate': 0.5, 'algorithm': 'log_multi'}, @@ -249,8 +267,8 @@ def test_vw_multi_suggest_log_multi(datadir, project): vw_type = annif.backend.get_backend('vw_multi') vw = vw_type( backend_id='vw_multi', - params={'chunksize': 1, - 'algorithm': 'log_multi'}, + config_params={'chunksize': 1, + 'algorithm': 'log_multi'}, datadir=str(datadir)) results = vw.suggest("""Arkeologiaa sanotaan joskus myös @@ -267,7 +285,7 @@ def test_vw_multi_train_multilabel_oaa(datadir, document_corpus, project): vw_type = annif.backend.get_backend('vw_multi') vw = vw_type( backend_id='vw_multi', - params={ + config_params={ 'chunksize': 4, 'learning_rate': 0.5, 'algorithm': 'multilabel_oaa'}, @@ -283,8 +301,8 @@ def test_vw_multi_suggest_multilabel_oaa(datadir, project): vw_type = annif.backend.get_backend('vw_multi') vw = vw_type( backend_id='vw_multi', - params={'chunksize': 1, - 'algorithm': 'multilabel_oaa'}, + config_params={'chunksize': 1, + 'algorithm': 'multilabel_oaa'}, datadir=str(datadir)) results = vw.suggest("""Arkeologiaa sanotaan joskus myös diff --git a/tests/test_project.py b/tests/test_project.py index 787aa940a..a6604c219 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -89,6 +89,38 @@ def test_get_project_nobackend(app): backend = project.backend +def test_get_project_noname(app): + with app.app_context(): + project = annif.project.get_project('noname') + assert project.name == project.project_id + + +def test_get_project_default_params_tfidf(app): + with app.app_context(): + project = annif.project.get_project('noparams-tfidf-fi') + expected_default_params = { + 'limit': 100 # From AnnifBackend class + } + actual_params = project.backend.params + for param, val in expected_default_params.items(): + assert param in actual_params and actual_params[param] == val + + +def test_get_project_default_params_fasttext(app): + pytest.importorskip("annif.backend.fasttext") + with app.app_context(): + project = annif.project.get_project('noparams-fasttext-fi') + expected_default_params = { + 'limit': 100, # From AnnifBackend class + 'dim': 100, # Rest from FastTextBackend class + 'lr': 0.25, + 'epoch': 5, + 'loss': 'hs'} + actual_params = project.backend.params + for param, val in expected_default_params.items(): + assert param in actual_params and actual_params[param] == val + + def test_get_project_invalid_config_file(app): app = annif.create_app( config_name='annif.default_config.TestingInvalidProjectsConfig')