Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update custom_el #481

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 14 additions & 12 deletions annotators/custom_entity_linking/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
FROM tensorflow/tensorflow:1.15.2-gpu

WORKDIR /src

RUN apt-key del 7fa2af80 && \
# install cuda packages
rm -f /etc/apt/sources.list.d/cuda*.list && \
curl https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb \
-o cuda-keyring_1.0-1_all.deb && \
dpkg -i cuda-keyring_1.0-1_all.deb
RUN apt-get -y update
RUN apt-get install -y build-essential zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget llvm \
libncurses5-dev libncursesw5-dev xz-utils libffi-dev liblzma-dev

# install auxiliary packages for sqlite3
RUN apt-get -y update && apt-get install -y build-essential zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget llvm \
libncurses5-dev libncursesw5-dev xz-utils libffi-dev liblzma-dev && \
# install git and sqlite3
RUN apt-get -y update && \
apt-get install -y software-properties-common && \
apt-get install git -y && apt-get install -y sqlite3
apt-get update && apt-get install git -y

RUN apt-get install -y sqlite3

ARG LANGUAGE=EN
ENV LANGUAGE ${LANGUAGE}
Expand All @@ -25,15 +28,14 @@ ARG SED_ARG=" | "
ENV CONFIG=$CONFIG
ENV PORT=$PORT

COPY ./annotators/custom_entity_linking/requirements.txt /src/requirements.txt
RUN pip install -r /src/requirements.txt
RUN python -c "import nltk; nltk.download('stopwords')"
COPY ./annotators/custom_entity_linking/requirements.txt ./requirements.txt
RUN pip install -r ./requirements.txt

COPY $SRC_DIR .

COPY $SRC_DIR /src

WORKDIR /src
RUN python -m deeppavlov install $CONFIG

RUN sed -i "s|$SED_ARG|g" "$CONFIG"

CMD gunicorn --workers=1 --timeout 500 server:app -b 0.0.0.0:8153
CMD gunicorn --workers=1 --timeout 500 server:app -b 0.0.0.0:${PORT}
4 changes: 2 additions & 2 deletions annotators/custom_entity_linking/custom_entity_linking.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"chainer": {
"in": ["user_ids", "entity_substr", "entity_tags", "sentences"],
"in": ["user_ids", "entity_substr", "entity_tags"],
"pipe": [
{
"class_name": "src.entity_linking:EntityLinker",
"in": ["user_ids", "entity_substr", "entity_tags", "sentences"],
"in": ["user_ids", "entity_substr", "entity_tags"],
"out": ["entity_ids", "entity_conf", "entity_id_tags"],
"load_path": "{DOWNLOADS_PATH}/entity_linking_eng/custom_el_eng_dream",
"rank_in_runtime": true,
Expand Down
158 changes: 91 additions & 67 deletions annotators/custom_entity_linking/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,82 +37,106 @@ def add_entities():
return {}


def preprocess_hist_utt(context_batch):
opt_context_batch = []
for hist_utt in context_batch:
hist_utt = [utt for utt in hist_utt if len(utt) > 1]
last_utt = hist_utt[-1]
if last_utt[-1] not in {".", "!", "?"}:
last_utt = f"{last_utt}."
if len(hist_utt) > 1:
prev_utt = hist_utt[-2]
if prev_utt[-1] not in {".", "!", "?"}:
prev_utt = f"{prev_utt}."
opt_context_batch.append([prev_utt, last_utt])
else:
opt_context_batch.append([last_utt])
return opt_context_batch


def define_entity_info(prex_info, context, substr_list, ids_list, conf_list, id_tags_list):
entity_info_list = []
triplet = dict()
if isinstance(prex_info, list) and prex_info:
prex_info = prex_info[0]
if prex_info:
triplet = prex_info.get("triplet", {})
rel = ""
if "relation" in triplet:
rel = triplet["relation"]
elif "property" in triplet:
rel = triplet["property"]
for substr, ids, confs, id_tags in zip(substr_list, ids_list, conf_list, id_tags_list):
entity_info = {}
is_abstract = rel.lower().replace("_", " ") in abstract_rels and not any(
[f" {word} {substr}" in context for word in ["the", "my", "his", "her"]]
)

f_ids, f_confs, f_id_tags = [], [], []
for entity_id, conf, id_tag in zip(ids, confs, id_tags):
if id_tag.startswith("Abstract") and not is_abstract:
pass
else:
f_ids.append(entity_id)
f_confs.append(conf)
f_id_tags.append(id_tag)

if f_ids:
entity_info["entity_substr"] = substr
entity_info["entity_ids"] = f_ids
entity_info["confidences"] = [float(elem[2]) for elem in f_confs]
entity_info["tokens_match_conf"] = [float(elem[0]) for elem in f_confs]
entity_info["entity_id_tags"] = f_id_tags
entity_info_list.append(entity_info)
return entity_info_list


@app.route("/model", methods=["POST"])
def respond():
st_time = time.time()
user_ids = request.json.get("user_id", [""])
substr_batch = request.json.get("entity_substr", [[""]])
tags_batch = request.json.get("entity_tags", [["" for _ in substr_list] for substr_list in substr_batch])
entity_substr_batch = request.json.get("entity_substr", [[""]])
entity_tags_batch = request.json.get(
"entity_tags",
[["" for _ in entity_substr_list] for entity_substr_list in entity_substr_batch],
)
context_batch = request.json.get("context", [[""]])
prex_info_batch = request.json.get("property_extraction", [dict() for _ in substr_batch])
opt_context_batch = preprocess_hist_utt(context_batch)
prex_info_batch = request.json.get("property_extraction", [{} for _ in entity_substr_batch])
opt_context_batch = []
logger.info(f"init context: {context_batch}")
for hist_uttr in context_batch:
if len(hist_uttr) == 1:
opt_context_batch.append(hist_uttr[0])
else:
prev_uttr = hist_uttr[-2]
cur_uttr = hist_uttr[-1]
is_q = (
any([prev_uttr.startswith(q_word) for q_word in ["what ", "who ", "when ", "where "]])
or "?" in prev_uttr
)
if is_q and len(cur_uttr.split()) < 3:
opt_context_batch.append(f"{prev_uttr} {cur_uttr}")
else:
opt_context_batch.append(cur_uttr)

entity_info_batch = [[dict()] for _ in substr_batch]
logger.info(f"context batch: {opt_context_batch}")
entity_info_batch = [[{}] for _ in entity_substr_batch]
try:
substr_batch, ids_batch, conf_batch, id_tags_batch = el(user_ids, substr_batch, tags_batch, opt_context_batch)
(
entity_substr_batch,
entity_ids_batch,
conf_batch,
entity_id_tags_batch,
) = el(user_ids, entity_substr_batch, entity_tags_batch)
entity_info_batch = []
for (substr_list, ids_list, conf_list, id_tags_list, prex_info, context) in zip(
substr_batch, ids_batch, conf_batch, id_tags_batch, prex_info_batch, opt_context_batch
for (
entity_substr_list,
entity_ids_list,
conf_list,
entity_id_tags_list,
prex_info,
context,
) in zip(
entity_substr_batch,
entity_ids_batch,
conf_batch,
entity_id_tags_batch,
prex_info_batch,
opt_context_batch,
):
if context:
context = " ".join(context)
else:
context = ""
entity_info_list = define_entity_info(prex_info, context, substr_list, ids_list, conf_list, id_tags_list)
entity_info_list = []
triplets = {}
if isinstance(prex_info, list) and prex_info:
prex_info = prex_info[0]
if prex_info:
triplets = prex_info.get("triplets", {})
obj2rel_dict = {}
for triplet in triplets:
obj = triplet["object"].lower()
if "relation" in triplet:
rel = triplet["relation"]
elif "property" in triplet:
rel = triplet["property"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

может else: rel = "" для красоты?

else:
rel = ""
obj2rel_dict[obj] = rel
for entity_substr, entity_ids, confs, entity_id_tags in zip(
entity_substr_list,
entity_ids_list,
conf_list,
entity_id_tags_list,
):
entity_info = {}
entity_substr = entity_substr.lower()
logger.info(f"context -- {context}")
context = context.lower()
curr_rel = obj2rel_dict.get(entity_substr, "")
is_abstract = curr_rel.lower().replace("_", " ") in abstract_rels and not any(
[f" {word} {entity_substr}" in context for word in ["the", "my", "his", "her"]]
)

f_entity_ids, f_confs, f_entity_id_tags = [], [], []
for entity_id, conf, entity_id_tag in zip(entity_ids, confs, entity_id_tags):
if entity_id_tag.startswith("Abstract") and not is_abstract:
pass
else:
f_entity_ids.append(entity_id)
f_confs.append(conf)
f_entity_id_tags.append(entity_id_tag)

if f_entity_ids and entity_substr in context:
entity_info["entity_substr"] = entity_substr
entity_info["entity_ids"] = f_entity_ids
entity_info["confidences"] = [float(elem[2]) for elem in f_confs]
entity_info["tokens_match_conf"] = [float(elem[0]) for elem in f_confs]
entity_info["entity_id_tags"] = f_entity_id_tags
entity_info_list.append(entity_info)
entity_info_batch.append(entity_info_list)
except Exception as e:
sentry_sdk.capture_exception(e)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
CONFIG: custom_entity_linking.json
SERVICE_PORT: 8153
SRC_DIR: annotators/custom_entity_linking/
SERVICE_NAME: custom_entity_linking
FLASK_APP: server
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: custom-entity-linking
endpoints:
- model
- add_entities
compose:
env_file:
- .env
build:
args:
CONFIG: custom_entity_linking.json
PORT: 8153
SRC_DIR: annotators/custom_entity_linking
context: ./
dockerfile: annotators/custom_entity_linking/Dockerfile
deploy:
resources:
limits:
memory: 128M
reservations:
memory: 128M
volumes:
- "./annotators/custom_entity_linking:/src"
- "~/.deeppavlov:/root/.deeppavlov"
ports:
- 8153:8153
proxy: null
Loading