Skip to content

Commit

Permalink
Feat/merge ignored nounphrases lists (#118)
Browse files Browse the repository at this point in the history
* feat: merge ignored nounphrases

* fix: codestyle

* move ignore lists to separate dir and expose constants in init

* fix codestyle

* add docstring

* add ./common mount to spacy-nounphrases config

* feat: common for spacy-nounphrases

* fix: import

Co-authored-by: mtalimanchuk <mtalimanchuk@gmail.com>
  • Loading branch information
dilyararimovna and mtalimanchuk authored Mar 10, 2022
1 parent 10b2d7e commit 17d6e37
Show file tree
Hide file tree
Showing 12 changed files with 301 additions and 960 deletions.
249 changes: 0 additions & 249 deletions annotators/fact_retrieval/remove_lists.py

This file was deleted.

7 changes: 4 additions & 3 deletions annotators/fact_retrieval/tfidf_ranker.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
from deeppavlov.core.models.estimator import Component
from deeppavlov.core.common.file import read_json
from deeppavlov.core.commands.utils import expand_path
from remove_lists import NP_REMOVE_LIST, NP_IGNORE_LIST

from common.ignore_lists import FALSE_POS_NPS_LIST, BAD_NPS_LIST

logger = getLogger(__name__)

Expand All @@ -50,10 +51,10 @@ def __init__(
freq_unigrams = f.read().splitlines()[:1000]

self.np_ignore_expr = re.compile(
"(" + "|".join([r"\b%s\b" % word for word in NP_IGNORE_LIST + freq_unigrams]) + ")", re.IGNORECASE
"(" + "|".join([r"\b%s\b" % word for word in BAD_NPS_LIST + freq_unigrams]) + ")", re.IGNORECASE
)
self.np_remove_expr = re.compile(
"(" + "|".join([r"\b%s\b" % word for word in NP_REMOVE_LIST]) + ")", re.IGNORECASE
"(" + "|".join([r"\b%s\b" % word for word in FALSE_POS_NPS_LIST]) + ")", re.IGNORECASE
)
self.rm_spaces_expr = re.compile(r"\s\s+")

Expand Down
8 changes: 6 additions & 2 deletions annotators/spacy_nounphrases/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
FROM python:3.8.4

RUN mkdir /src
COPY ./requirements.txt /src/requirements.txt

COPY ./annotators/spacy_nounphrases/ /src/
COPY ./common/ /src/common/

COPY ./annotators/spacy_nounphrases/requirements.txt /src/requirements.txt
RUN pip install -r /src/requirements.txt
RUN pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz
COPY ./ /src/

WORKDIR /src

CMD gunicorn --workers=2 server:app
Loading

0 comments on commit 17d6e37

Please sign in to comment.