diff --git a/services/ner-tagger/Dockerfile b/services/ner-tagger/Dockerfile index 1c82b690..5e6b2c72 100644 --- a/services/ner-tagger/Dockerfile +++ b/services/ner-tagger/Dockerfile @@ -15,6 +15,7 @@ RUN --mount=type=secret,id=webdav_login \ dvc remote add -d webdav-remote "$(cat /run/secrets/webdav_url)" && \ dvc remote modify --local webdav-remote user "$(cat /run/secrets/webdav_login)" && \ dvc remote modify --local webdav-remote password "$(cat /run/secrets/webdav_password)" +RUN curl -L https://huggingface.co/flair/ner-english/resolve/8280adae5dba43c6bea2e89986044fd50394f951/pytorch_model.bin?download=true -o /dvc/ner-english-model COPY ./v1/v1.dvc /dvc RUN dvc pull -v @@ -31,3 +32,4 @@ COPY --chown=daemon:daemon . /app/public/ RUN mv ./config.json /app && chmod a+w /app/config.json # Copy dvc files COPY --chown=daemon:daemon --from=dvc-files /dvc/v1 /app/public/v1 +COPY --chown=daemon:daemon --from=dvc-files /dvc/ner-english-model /app/ner-english-model diff --git a/services/ner-tagger/v1/geoTagger/geoTagger.py b/services/ner-tagger/v1/geoTagger/geoTagger.py index 6d7c14dd..522ae119 100755 --- a/services/ner-tagger/v1/geoTagger/geoTagger.py +++ b/services/ner-tagger/v1/geoTagger/geoTagger.py @@ -7,22 +7,22 @@ from flair.data import Sentence from flair.models import SequenceTagger -logging.getLogger('flair').handlers[0].stream = sys.stderr +logging.getLogger("flair").handlers[0].stream = sys.stderr -tagger = SequenceTagger.load("flair/ner-english") +tagger = SequenceTagger.load("/app/ner-english-model") for line in sys.stdin: data = json.loads(line) - text = data['value'] + text = data["value"] sent = text.split(".") sentences = [Sentence(sent[i] + ".") for i in range(len(sent))] tagger.predict(sentences) geo = [] for sentence in sentences: - for entity in sentence.get_spans('ner'): + for entity in sentence.get_spans("ner"): if entity.tag == "LOC": geo.append(entity.text) - data['value'] = geo + data["value"] = geo sys.stdout.write(json.dumps(data)) - sys.stdout.write('\n') + sys.stdout.write("\n")