Skip to content

Commit

Permalink
Merge branch 'main' into upgrade-to-connexion3
Browse files Browse the repository at this point in the history
  • Loading branch information
juhoinkinen committed Nov 15, 2023
2 parents 9cd6a60 + ff1d32c commit 5d7ec95
Show file tree
Hide file tree
Showing 69 changed files with 1,533 additions and 586 deletions.
2 changes: 0 additions & 2 deletions .codecov.yml

This file was deleted.

9 changes: 5 additions & 4 deletions .github/actions/prepare/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,12 @@ runs:
key: ignore-me
restore-keys: |
poetry-installation-and-cache-${{ inputs.python-version }}-${{ inputs.poetry-version }}-
- name: Install Poetry
shell: bash
run: |
pipx install poetry==${{ inputs.poetry-version }}
- name: Set up Python ${{ inputs.python-version }}
uses: actions/setup-python@5ccb29d8773c3f3f653e1705f474dfaa8a06a912 # v4.4.0
with:
python-version: ${{ inputs.python-version }}
- name: Install Poetry
shell: bash
run: |
pipx install poetry==${{ inputs.poetry-version }}
poetry env use ${{ inputs.python-version }}
51 changes: 45 additions & 6 deletions .github/workflows/cicd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ on:
env:
PIPX_HOME: "/home/runner/.cache/pipx"
PIPX_BIN_DIR: "/home/runner/.local/bin"
POETRY_VERSION: "1.4.1"
POETRY_VERSION: "1.5.1"
jobs:

lint:
Expand All @@ -36,12 +36,30 @@ jobs:
run: |
poetry run flake8
time-startup:
runs-on: ubuntu-22.04
name: check CLI startup time
steps:
- uses: actions/checkout@v3
- name: "Prepare: restore caches, install Poetry, set up Python"
id: prepare
uses: ./.github/actions/prepare
with:
python-version: "3.9"
poetry-version: ${{ env.POETRY_VERSION }}
- name: Install Python dependencies
run: |
poetry install
- name: Check startup time
run: |
poetry run tests/time-startup.sh
test:
runs-on: ubuntu-22.04
timeout-minutes: 15
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.8", "3.9", "3.10", "3.11"]
name: test on Python ${{ matrix.python-version }}
steps:
- uses: actions/checkout@v3
Expand All @@ -61,7 +79,7 @@ jobs:
# Selectively install the optional dependencies for some Python versions
# For Python 3.8:
if [[ ${{ matrix.python-version }} == '3.8' ]]; then
poetry install -E "nn omikuji yake voikko";
poetry install -E "nn omikuji yake voikko stwfsa";
fi
# For Python 3.9:
if [[ ${{ matrix.python-version }} == '3.9' ]]; then
Expand All @@ -71,7 +89,13 @@ jobs:
fi
# For Python 3.10:
if [[ ${{ matrix.python-version }} == '3.10' ]]; then
poetry install -E "nn omikuji yake";
poetry install -E "nn omikuji yake stwfsa";
fi
# For Python 3.11:
if [[ ${{ matrix.python-version }} == '3.11' ]]; then
poetry install -E "nn fasttext yake stwfsa voikko spacy";
# download the small English pretrained spaCy model needed by spacy analyzer
poetry run python -m spacy download en_core_web_sm --upgrade-strategy only-if-needed
fi
poetry run python -m nltk.downloader punkt
- name: Test with pytest
Expand All @@ -93,9 +117,23 @@ jobs:
# A new key is created to update the cache if some dependency has been updated
key: poetry-installation-and-cache-${{ matrix.python-version }}-${{ env.POETRY_VERSION }}-${{ hashFiles('**/poetry.lock') }}

test-docker-image:
name: "test Docker image"
runs-on: ubuntu-22.04
timeout-minutes: 15
steps:
- name: "Build image for testing"
uses: docker/build-push-action@c56af957549030174b10d6867f20e78cfd7debc5 # v3.2.0
with:
push: false
tags: test-image
- name: "Test with pytest"
run: |
docker run --rm --workdir /Annif test-image pytest -p no:cacheprovider
publish-docker-latest:
name: publish latest Docker image
needs: [lint, test]
needs: [lint, test, test-docker-image]
runs-on: ubuntu-22.04
timeout-minutes: 15
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
Expand All @@ -122,7 +160,7 @@ jobs:

publish-release:
name: publish release
needs: [lint, test]
needs: [lint, test, test-docker-image]
runs-on: ubuntu-22.04
if: github.event_name == 'push' && contains(github.ref, 'refs/tags/')
steps:
Expand Down Expand Up @@ -151,6 +189,7 @@ jobs:
with:
images: quay.io/natlibfi/annif
tags: |
type=semver,pattern={{version}},suffix=-{{date 'YYYYMMDD'}}
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
- name: Build and push to Quay.io
Expand Down
39 changes: 39 additions & 0 deletions .github/workflows/docker-rebuild.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: "Docker rebuild"
on: workflow_dispatch
jobs:
rebuild-docker-images:
name: "Docker rebuild"
runs-on: ubuntu-22.04
timeout-minutes: 15
steps:
- name: "Build for testing"
uses: docker/build-push-action@c56af957549030174b10d6867f20e78cfd7debc5 # v3.2.0
with:
push: false
tags: test-image
- name: "Test with pytest"
run: |
docker run --rm --workdir /Annif test-image pytest -p no:cacheprovider
- name: Login to Quay.io
uses: docker/login-action@465a07811f14bebb1938fbed4728c6a1ff8901fc # v2.2.0
with:
registry: quay.io
username: ${{ secrets.YHTEENTOIMIVUUSPALVELUT_QUAY_IO_USERNAME }}
password: ${{ secrets.YHTEENTOIMIVUUSPALVELUT_QUAY_IO_PASSWORD }}
- name: Docker meta
id: meta
uses: docker/metadata-action@2c0bd771b40637d97bf205cbccdd294a32112176 # v4.5.0
with:
images: quay.io/natlibfi/annif
flavor: |
latest=false
tags: |
type=semver,pattern={{version}},suffix=-{{date 'YYYYMMDD'}}
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
- name: Build and push to Quay.io
uses: docker/build-push-action@44ea916f6c540f9302d50c2b1e5a8dc071f15cdf # v4.1.0
with:
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
7 changes: 3 additions & 4 deletions .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
version: 2

build:
os: "ubuntu-20.04"
os: "ubuntu-22.04"
tools:
python: "3.9"
python: "3.10"

# Build documentation in the docs/ directory with Sphinx
sphinx:
Expand All @@ -27,10 +27,9 @@ python:
- nn
- omikuji
- fasttext
- stwfsa
- yake
- pycld3
- spacy
- requirements: docs/requirements.txt
- method: pip
path: .
system_packages: true
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ authors:
affiliation: "National Library of Finland"
title: "Annif"
abstract: "Annif is an automatic indexing software."
version: 1.0.0-dev
version: 1.1.0-dev
license:
- Apache-2.0
- GPL-3.0
date-released: 2023-04-18
date-released: 2023-08-18
doi: 10.5281/zenodo.2578948
repository-code: "https://github.com/NatLibFi/Annif"
contact:
Expand Down
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
FROM python:3.10-slim-bullseye
FROM python:3.10-slim-bookworm
LABEL org.opencontainers.image.authors="grp-natlibfi-annif@helsinki.fi"
SHELL ["/bin/bash", "-c"]

ARG optional_dependencies="fasttext voikko fasttext nn omikuji yake spacy"
ARG optional_dependencies="voikko fasttext nn omikuji yake spacy stwfsa"
ARG POETRY_VIRTUALENVS_CREATE=false

# Install system dependencies needed at runtime:
RUN apt-get update && \
RUN apt-get update && apt-get upgrade -y && \
if [[ $optional_dependencies =~ "voikko" ]]; then \
apt-get install -y --no-install-recommends \
libvoikko1 \
Expand Down
31 changes: 28 additions & 3 deletions annif/__init__.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,28 @@
#!/usr/bin/env python3

from __future__ import annotations

import logging
import os
import os.path
from typing import TYPE_CHECKING

logging.basicConfig()
logger = logging.getLogger("annif")
logger.setLevel(level=logging.INFO)

import annif.backend # noqa

if TYPE_CHECKING:
from flask.app import Flask


def create_flask_app(config_name=None):
def create_flask_app(config_name: str | None = None) -> Flask:
"""Create a Flask app to be used by the CLI."""
from flask import Flask

_set_tensorflow_loglevel()

app = Flask(__name__)
config_name = _get_config_name(config_name)
logger.debug(f"creating flask app with configuration {config_name}")
Expand All @@ -23,7 +31,7 @@ def create_flask_app(config_name=None):
return app


def create_app(config_name=None):
def create_app(config_name: str | None = None) -> Flask:
"""Create a Connexion app to be used for the API."""
# 'cxapp' here is the Connexion application that has a normal Flask app
# as a property (cxapp.app)
Expand Down Expand Up @@ -62,7 +70,7 @@ def create_app(config_name=None):
return cxapp


def _get_config_name(config_name):
def _get_config_name(config_name: str | None) -> str:
if config_name is None:
config_name = os.environ.get("ANNIF_CONFIG")
if config_name is None:
Expand All @@ -71,3 +79,20 @@ def _get_config_name(config_name):
else:
config_name = "annif.default_config.ProductionConfig" # pragma: no cover
return config_name


def _set_tensorflow_loglevel():
"""Set TensorFlow log level based on Annif log level (--verbosity/-v
option) using an environment variable. INFO messages by TF are shown only on
DEBUG (or NOTSET) level of Annif."""
annif_loglevel = logger.getEffectiveLevel()
tf_loglevel_mapping = {
0: "0", # NOTSET
10: "0", # DEBUG
20: "1", # INFO
30: "1", # WARNING
40: "2", # ERROR
50: "3", # CRITICAL
}
tf_loglevel = tf_loglevel_mapping[annif_loglevel]
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", tf_loglevel)
7 changes: 6 additions & 1 deletion annif/analyzer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
"""Collection of language-specific analyzers and analyzer registry for Annif"""
from __future__ import annotations

import re
from typing import TYPE_CHECKING

import annif
from annif.util import parse_args

from . import simple, simplemma, snowball

if TYPE_CHECKING:
from annif.analyzer.analyzer import Analyzer

_analyzers = {}


def register_analyzer(analyzer):
_analyzers[analyzer.name] = analyzer


def get_analyzer(analyzerspec):
def get_analyzer(analyzerspec: str) -> Analyzer:
match = re.match(r"(\w+)(\((.*)\))?", analyzerspec)
if match is None:
raise ValueError("Invalid analyzer specification {}".format(analyzerspec))
Expand Down
9 changes: 5 additions & 4 deletions annif/analyzer/analyzer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Common functionality for analyzers."""
from __future__ import annotations

import abc
import functools
Expand All @@ -15,18 +16,18 @@ class Analyzer(metaclass=abc.ABCMeta):
name = None
token_min_length = 3 # default value, can be overridden in instances

def __init__(self, **kwargs):
def __init__(self, **kwargs) -> None:
if _KEY_TOKEN_MIN_LENGTH in kwargs:
self.token_min_length = int(kwargs[_KEY_TOKEN_MIN_LENGTH])

def tokenize_sentences(self, text):
def tokenize_sentences(self, text: str) -> list[str]:
"""Tokenize a piece of text (e.g. a document) into sentences."""
import nltk.tokenize

return nltk.tokenize.sent_tokenize(text)

@functools.lru_cache(maxsize=50000)
def is_valid_token(self, word):
def is_valid_token(self, word: str) -> bool:
"""Return True if the word is an acceptable token."""
if len(word) < self.token_min_length:
return False
Expand All @@ -36,7 +37,7 @@ def is_valid_token(self, word):
return True
return False

def tokenize_words(self, text, filter=True):
def tokenize_words(self, text: str, filter: bool = True) -> list[str]:
"""Tokenize a piece of text (e.g. a sentence) into words. If
filter=True (default), only return valid tokens (e.g. not
punctuation, numbers or very short words)"""
Expand Down
5 changes: 3 additions & 2 deletions annif/analyzer/simple.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
"""Simple analyzer for Annif. Only folds words to lower case."""
from __future__ import annotations

from . import analyzer


class SimpleAnalyzer(analyzer.Analyzer):
name = "simple"

def __init__(self, param, **kwargs):
def __init__(self, param: None, **kwargs) -> None:
self.param = param
super().__init__(**kwargs)

def _normalize_word(self, word):
def _normalize_word(self, word: str) -> str:
return word.lower()
5 changes: 3 additions & 2 deletions annif/analyzer/simplemma.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Simplemma analyzer for Annif, based on simplemma lemmatizer."""
from __future__ import annotations

import simplemma

Expand All @@ -8,9 +9,9 @@
class SimplemmaAnalyzer(analyzer.Analyzer):
name = "simplemma"

def __init__(self, param, **kwargs):
def __init__(self, param: str, **kwargs) -> None:
self.lang = param
super().__init__(**kwargs)

def _normalize_word(self, word):
def _normalize_word(self, word: str) -> str:
return simplemma.lemmatize(word, lang=self.lang)
Loading

0 comments on commit 5d7ec95

Please sign in to comment.