Skip to content

Commit

Permalink
Refactor project dependencies (#2)
Browse files Browse the repository at this point in the history
* Refactor project dependencies

* Install latest pip and poetry
  • Loading branch information
chomechome authored Sep 10, 2020
1 parent c6fd216 commit 7a44be7
Show file tree
Hide file tree
Showing 20 changed files with 342 additions and 1,682 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
*.joblib filter=lfs diff=lfs merge=lfs -text
*.crfsuite filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ python:
- 3.6

install:
- "pip install pip==18.1"
- "pip install poetry==0.12.10"
- "pip install -U pip"
- "pip install poetry"
- "poetry install --no-interaction"

jobs:
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ lint:
$(PYTHON) mypy $(CODE)

test:
$(PYTHON) pytest -n 8 --boxed tests
$(PYTHON) pytest tests

coverage:
$(PYTHON) pytest --cov=maru
Expand Down
13 changes: 6 additions & 7 deletions maru/resource/crf/__init__.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,24 @@
import functools
import os
import pathlib
from typing import Dict

import joblib
import pycrfsuite
from sklearn.externals import joblib

from maru.feature.extractor import IFeatureExtractor
from maru.tag import Tag

_get_path = functools.partial(os.path.join, os.path.dirname(__file__))
_DIRECTORY = pathlib.Path(__file__).parent.absolute()


def load_extractor() -> IFeatureExtractor:
return joblib.load(_get_path('extractor.joblib'))
return joblib.load(_DIRECTORY / 'extractor.joblib')


def load_tags() -> Dict[int, Tag]:
return joblib.load(_get_path('tags.joblib'))
return joblib.load(_DIRECTORY / 'tags.joblib')


def load_tagger() -> pycrfsuite.Tagger:
tagger = pycrfsuite.Tagger()
tagger.open(_get_path('tagger.crfsuite'))
tagger.open(str(_DIRECTORY / 'tagger.crfsuite'))
return tagger
20 changes: 11 additions & 9 deletions maru/resource/linear/__init__.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,37 @@
import functools
import gzip
import json
import os
import pathlib
from typing import Dict

import joblib
import numpy
from sklearn.externals import joblib

from maru.feature.extractor import IFeatureExtractor
from maru.feature.vocabulary import PositionalFeatureVocabulary
from maru.tag import Tag

_get_path = functools.partial(os.path.join, os.path.dirname(__file__))
_DIRECTORY = pathlib.Path(__file__).parent.absolute()


def load_extractor() -> IFeatureExtractor:
return joblib.load(_get_path('extractor.joblib'))
return joblib.load(_DIRECTORY / 'extractor.joblib')


def load_vocabulary() -> PositionalFeatureVocabulary:
with open(_get_path('vocabulary.json'), encoding='utf8') as f:
with (_DIRECTORY / 'vocabulary.json').open(encoding='utf8') as f:
data = {int(index): mapping for index, mapping in json.load(f).items()}
return PositionalFeatureVocabulary(data)


def load_tags() -> Dict[int, Tag]:
return joblib.load(_get_path('tags.joblib'))
return joblib.load(_DIRECTORY / 'tags.joblib')


def load_coefficients() -> numpy.array:
return joblib.load(_get_path('coefficients.joblib'))
with gzip.open(_DIRECTORY / 'coefficients.gz', 'rb') as data:
return numpy.load(data)


def load_intercept() -> numpy.array:
return joblib.load(_get_path('intercept.joblib'))
with gzip.open(_DIRECTORY / 'intercept.gz', 'rb') as data:
return numpy.load(data)
3 changes: 3 additions & 0 deletions maru/resource/linear/coefficients.gz
Git LFS file not shown
3 changes: 0 additions & 3 deletions maru/resource/linear/coefficients.joblib

This file was deleted.

3 changes: 3 additions & 0 deletions maru/resource/linear/intercept.gz
Git LFS file not shown
3 changes: 0 additions & 3 deletions maru/resource/linear/intercept.joblib

This file was deleted.

28 changes: 14 additions & 14 deletions maru/resource/rnn/__init__.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,42 @@
import functools
import json
import os
import pathlib
from typing import Dict

import keras
import tensorflow
from sklearn.externals import joblib
import joblib
import tensorflow.keras

from maru.feature.extractor import IFeatureExtractor
from maru.feature.vocabulary import FeatureVocabulary
from maru.tag import Tag

_get_path = functools.partial(os.path.join, os.path.dirname(__file__))
_DIRECTORY = pathlib.Path(__file__).parent.absolute()


def load_extractor() -> IFeatureExtractor:
return joblib.load(_get_path('extractor.joblib'))
return joblib.load(_DIRECTORY / 'extractor.joblib')


def load_tags() -> Dict[int, Tag]:
return joblib.load(_get_path('tags.joblib'))
return joblib.load(_DIRECTORY / 'tags.joblib')


def load_tagger() -> keras.Model:
def load_tagger() -> tensorflow.keras.Model:
# this restrains tensorflow from allocating all of available GPU memory
config = tensorflow.ConfigProto()
config = tensorflow.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True

keras.backend.set_session(tensorflow.Session(config=config))
tensorflow.compat.v1.keras.backend.set_session(
tensorflow.compat.v1.Session(config=config)
)

return keras.models.load_model(_get_path('tagger.h5'))
return tensorflow.keras.models.load_model(_DIRECTORY / 'tagger.h5')


def load_char_vocabulary() -> FeatureVocabulary:
with open(_get_path('char_vocabulary.json'), encoding='utf8') as f:
with (_DIRECTORY / 'char_vocabulary.json').open(encoding='utf8') as f:
return FeatureVocabulary(json.load(f))


def load_grammeme_vocabulary() -> FeatureVocabulary:
with open(_get_path('grammeme_vocabulary.json'), encoding='utf8') as f:
with (_DIRECTORY / 'grammeme_vocabulary.json').open(encoding='utf8') as f:
return FeatureVocabulary(json.load(f))
2 changes: 1 addition & 1 deletion maru/tagger/rnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(self, cache_size: Optional[int] = 15000):
self._tagger = rnn.load_tagger()
self._tags = rnn.load_tags()

_, _, max_word_length = self._tagger.get_layer(_CHAR_INPUT).input_shape
_, _, max_word_length = self._tagger.get_layer(_CHAR_INPUT).input_shape[0]

grammeme_vocabulary = rnn.load_grammeme_vocabulary()
char_vocabulary = rnn.load_char_vocabulary()
Expand Down
21 changes: 10 additions & 11 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "maru"
version = "0.1.2"
version = "0.1.3"
description = "Morphological Analyzer for Russian 💬"
license = "MIT"
authors = ["Vladislav Blinov <cunningplan@yandex.ru>"]
Expand All @@ -15,20 +15,19 @@ classifiers = [

[tool.poetry.dependencies]
python = "^3.6"
numpy = "^1.15.0"
pymorphy2 = { version = "^0.8", extras = [ "fast" ] }
scipy = "^1.1.0"
keras = "^2.2.2"
tensorflow = ">=1.9.0, <1.15.0"
scikit-learn = "^0.19.0"
python-crfsuite = "^0.9.5"
lru-dict = "^1.1.6"
tensorflow-gpu = { version = ">=1.9.0, <1.15.0", optional = true }
numpy = ">=1.15.0"
pymorphy2 = { version = ">=0.8", extras = [ "fast" ] }
scipy = ">=1.1.0"
keras = ">=2.2.2"
tensorflow = ">=1.14.0"
python-crfsuite = ">=0.9.5"
lru-dict = ">=1.1.6"
tensorflow-gpu = { version = ">=1.14.0", optional = true }
joblib = ">=0.11.0"

[tool.poetry.dev-dependencies]
pytest = "^5.2.2"
pytest-cov = "^2.8.1"
pytest-xdist = "^1.30.0"
mypy = "^0.740"
flake8 = "^3.7.9"
flake8-isort = "^2.7.0"
Expand Down
Loading

0 comments on commit 7a44be7

Please sign in to comment.