Skip to content

Commit

Permalink
fix(ner-tagger): Fix model version
Browse files Browse the repository at this point in the history
Use a version of the model compatible with the version of flair used

Fix #156
  • Loading branch information
parmentf committed Aug 7, 2024
1 parent 052cbe0 commit 830e1ff
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 6 deletions.
2 changes: 2 additions & 0 deletions services/ner-tagger/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ RUN --mount=type=secret,id=webdav_login \
dvc remote add -d webdav-remote "$(cat /run/secrets/webdav_url)" && \
dvc remote modify --local webdav-remote user "$(cat /run/secrets/webdav_login)" && \
dvc remote modify --local webdav-remote password "$(cat /run/secrets/webdav_password)"
RUN curl -L https://huggingface.co/flair/ner-english/resolve/8280adae5dba43c6bea2e89986044fd50394f951/pytorch_model.bin?download=true -o /dvc/ner-english-model
COPY ./v1/v1.dvc /dvc
RUN dvc pull -v

Expand All @@ -31,3 +32,4 @@ COPY --chown=daemon:daemon . /app/public/
RUN mv ./config.json /app && chmod a+w /app/config.json
# Copy dvc files
COPY --chown=daemon:daemon --from=dvc-files /dvc/v1 /app/public/v1
COPY --chown=daemon:daemon --from=dvc-files /dvc/ner-english-model /app/ner-english-model
12 changes: 6 additions & 6 deletions services/ner-tagger/v1/geoTagger/geoTagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,22 @@
from flair.data import Sentence
from flair.models import SequenceTagger

logging.getLogger('flair').handlers[0].stream = sys.stderr
logging.getLogger("flair").handlers[0].stream = sys.stderr

tagger = SequenceTagger.load("flair/ner-english")
tagger = SequenceTagger.load("/app/ner-english-model")

for line in sys.stdin:
data = json.loads(line)
text = data['value']
text = data["value"]
sent = text.split(".")
sentences = [Sentence(sent[i] + ".") for i in range(len(sent))]
tagger.predict(sentences)
geo = []

for sentence in sentences:
for entity in sentence.get_spans('ner'):
for entity in sentence.get_spans("ner"):
if entity.tag == "LOC":
geo.append(entity.text)
data['value'] = geo
data["value"] = geo
sys.stdout.write(json.dumps(data))
sys.stdout.write('\n')
sys.stdout.write("\n")

0 comments on commit 830e1ff

Please sign in to comment.